├── aiy ├── __init__.py ├── _apis │ ├── __init__.py │ └── _speech.py ├── _drivers │ ├── __init__.py │ ├── _alsa.py │ ├── _tts.py │ ├── _player.py │ ├── _status_ui.py │ ├── _button.py │ ├── _led.py │ └── _recorder.py ├── assistant │ ├── __init__.py │ ├── grpc.py │ └── auth_helpers.py ├── i18n.py ├── cloudspeech.py ├── voicehat.py └── audio.py ├── commands ├── __init__.py ├── spotify │ ├── __init__.py │ └── spotify.py └── studio │ ├── __init__.py │ └── studio.py ├── requirements.txt ├── .gitignore ├── voicekitbot.service ├── config.example.py ├── slackbot.py ├── README.md └── voicekitbot.py /aiy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aiy/_apis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aiy/_drivers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aiy/assistant/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /commands/spotify/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /commands/studio/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | slackclient==1.0.5 2 | requests==2.13.0 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | env/ 3 | *.pyc 4 | resources/ 5 | deploy.sh 6 | config.py 7 | -------------------------------------------------------------------------------- /voicekitbot.service: -------------------------------------------------------------------------------- 1 | Description=Voicekitbot 2 | 3 | [Service] 4 | ExecStart=/bin/bash -c '/home/pi/AIY-voice-kit-python/env/bin/python3 -u voicekitbot/voicekitbot.py' 5 | WorkingDirectory=/home/pi/AIY-voice-kit-python 6 | Restart=always 7 | User=pi 8 | 9 | [Install] 10 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /config.example.py: -------------------------------------------------------------------------------- 1 | """ 2 | A sample config file 3 | """ 4 | 5 | slack = { 6 | "api_key": "", 7 | "bot_name": "voicekitbot", 8 | "awkbot_name": "awkbot", 9 | "channel_name": "#general" 10 | } 11 | 12 | router = { 13 | "url": "", 14 | "username": "", 15 | "password": "" 16 | } 17 | 18 | people = { 19 | "Name": [ 20 | "mac:id" 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /aiy/_drivers/_alsa.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Helpers for ALSA tools, including aplay and arecord.""" 16 | 17 | 18 | def sample_width_to_string(sample_width): 19 | """Convert sample width (bytes) to ALSA format string.""" 20 | return {1: 's8', 2: 's16', 4: 's32'}.get(sample_width, None) 21 | -------------------------------------------------------------------------------- /slackbot.py: -------------------------------------------------------------------------------- 1 | #!env/bin/python 2 | 3 | from slackclient import SlackClient 4 | from importlib import import_module 5 | from time import sleep 6 | import config 7 | import os 8 | 9 | 10 | class Bot(): 11 | def __init__(self): 12 | self.sc = SlackClient(config.slack["api_key"]) 13 | self.users = self.sc.api_call("users.list") 14 | self.channel = config.slack["channel_name"] 15 | 16 | self.bot_user = self.get_user_id_by_name( 17 | config.slack["bot_name"]) 18 | 19 | self.awkbot_user = self.get_user_id_by_name( 20 | config.slack["awkbot_name"]) 21 | 22 | def post(self, message): 23 | self.sc.api_call( 24 | "chat.postMessage", 25 | channel=self.channel, 26 | text="<@%s> %s" % (self.awkbot_user, message), 27 | as_user=self.bot_user 28 | ) 29 | 30 | def log(self, message): 31 | self.sc.api_call( 32 | "chat.postMessage", 33 | channel=self.channel, 34 | text="_%s_" % message, 35 | as_user=self.bot_user 36 | ) 37 | 38 | def get_user_id_by_name(self, user_name): 39 | for user in self.users["members"]: 40 | if user["name"] == user_name: 41 | return user["id"] 42 | -------------------------------------------------------------------------------- /commands/spotify/spotify.py: -------------------------------------------------------------------------------- 1 | def init(config): 2 | pass 3 | 4 | 5 | def list_actions(): 6 | """ A list of actions in this file that can be called """ 7 | return [ 8 | { 9 | "description": "Play music", 10 | "content": ["play music", "put the needle on the record"], 11 | "function": "play" 12 | }, 13 | { 14 | "description": "Pause music", 15 | "content": ["pause music", "stop music", "shut the front door"], 16 | "function": "pause" 17 | }, 18 | { 19 | "description": "Skip track", 20 | "content": ["skip track", "next track", "skip song", "next song"], 21 | "function": "skip" 22 | }, 23 | { 24 | "description": "Previous track", 25 | "content": ["back", "previous track", "previous song"], 26 | "function": "back" 27 | } 28 | ] 29 | 30 | 31 | def play(text): 32 | return { 33 | "post": "play", 34 | "say": "Playing music" 35 | } 36 | 37 | 38 | def pause(text): 39 | return { 40 | "post": "pause", 41 | "say": "Pausing music", 42 | } 43 | 44 | 45 | def skip(text): 46 | return { 47 | "post": "skip", 48 | "say": "Skipping track" 49 | } 50 | 51 | 52 | def back(text): 53 | return { 54 | "post": "back", 55 | "say": "Going back a track" 56 | } 57 | -------------------------------------------------------------------------------- /aiy/i18n.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Internationalization helpers.""" 16 | 17 | import gettext 18 | 19 | _DEFAULT_LANGUAGE_CODE = 'en-US' 20 | _LOCALE_DOMAIN = 'voice-recognizer' 21 | 22 | _language_code = _DEFAULT_LANGUAGE_CODE 23 | 24 | _locale_dir = None 25 | 26 | 27 | def set_locale_dir(locale_dir): 28 | """Sets the directory that contains the language bundles. 29 | 30 | This is only required if you call set_language_code with gettext_install=True. 31 | """ 32 | global _locale_dir 33 | if not locale_dir: 34 | raise ValueError('locale_dir must be valid') 35 | _locale_dir = locale_dir 36 | 37 | 38 | def set_language_code(code, gettext_install=False): 39 | """Set the BCP-47 language code that the speech systems should use. 40 | 41 | Args: 42 | gettext_install: if True, gettext's _() will be installed in as a builtin. 43 | As this has global effect, it should only be done by applications. 44 | """ 45 | global _language_code 46 | _language_code = code.replace('_', '-') 47 | 48 | if gettext_install: 49 | if not _locale_dir: 50 | raise ValueError('locale_dir is not set. Please call set_locale_dir().') 51 | language_id = code.replace('-', '_') 52 | t = gettext.translation(_LOCALE_DOMAIN, _locale_dir, [language_id], fallback=True) 53 | t.install() 54 | 55 | 56 | def get_language_code(): 57 | """Returns the BCP-47 language code that the speech systems should use. 58 | 59 | We don't use the system locale because the Assistant API only supports 60 | en-US at launch, so that should be used by default in all environments. 61 | """ 62 | return _language_code 63 | -------------------------------------------------------------------------------- /aiy/_drivers/_tts.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Wrapper around a TTS system.""" 16 | 17 | import functools 18 | import logging 19 | import os 20 | import subprocess 21 | import tempfile 22 | from aiy import i18n 23 | 24 | # Path to a tmpfs directory to avoid SD card wear 25 | TMP_DIR = '/run/user/%d' % os.getuid() 26 | 27 | logger = logging.getLogger('tts') 28 | 29 | 30 | def create_say(player): 31 | """Return a function say(words) for the given player.""" 32 | lang = i18n.get_language_code() 33 | return functools.partial(say, player, lang=lang) 34 | 35 | 36 | def say(player, words, lang='en-US'): 37 | """Say the given words with TTS. 38 | 39 | Args: 40 | player: To play the text-to-speech audio. 41 | words: string to say aloud. 42 | lang: language for the text-to-speech engine. 43 | """ 44 | try: 45 | (fd, tts_wav) = tempfile.mkstemp(suffix='.wav', dir=TMP_DIR) 46 | except IOError: 47 | logger.exception('Using fallback directory for TTS output') 48 | (fd, tts_wav) = tempfile.mkstemp(suffix='.wav') 49 | os.close(fd) 50 | words = '%s' % words 51 | try: 52 | subprocess.call(['pico2wave', '--lang', lang, '-w', tts_wav, words]) 53 | player.play_wav(tts_wav) 54 | finally: 55 | os.unlink(tts_wav) 56 | 57 | 58 | def _main(): 59 | import argparse 60 | from aiy import audio 61 | 62 | logging.basicConfig(level=logging.INFO) 63 | 64 | parser = argparse.ArgumentParser(description='Test TTS wrapper') 65 | parser.add_argument('words', nargs='*', help='Words to say') 66 | args = parser.parse_args() 67 | 68 | if args.words: 69 | words = ' '.join(args.words) 70 | player = audio.get_player() 71 | create_say(player)(words) 72 | 73 | 74 | if __name__ == '__main__': 75 | _main() 76 | -------------------------------------------------------------------------------- /aiy/_drivers/_player.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """A driver for audio playback.""" 16 | 17 | import logging 18 | import subprocess 19 | import wave 20 | 21 | import aiy._drivers._alsa 22 | 23 | logger = logging.getLogger('audio') 24 | 25 | 26 | class Player(object): 27 | """Plays short audio clips from a buffer or file.""" 28 | 29 | def __init__(self, output_device='default'): 30 | self._output_device = output_device 31 | 32 | def play_bytes(self, audio_bytes, sample_rate, sample_width=2): 33 | """Play audio from the given bytes-like object. 34 | 35 | Args: 36 | audio_bytes: audio data (mono) 37 | sample_rate: sample rate in Hertz (24 kHz by default) 38 | sample_width: sample width in bytes (eg 2 for 16-bit audio) 39 | """ 40 | cmd = [ 41 | 'aplay', 42 | '-q', 43 | '-t', 'raw', 44 | '-D', self._output_device, 45 | '-c', '1', 46 | # pylint: disable=W0212 47 | '-f', aiy._drivers._alsa.sample_width_to_string(sample_width), 48 | '-r', str(sample_rate), 49 | ] 50 | 51 | aplay = subprocess.Popen(cmd, stdin=subprocess.PIPE) 52 | aplay.stdin.write(audio_bytes) 53 | aplay.stdin.close() 54 | retcode = aplay.wait() 55 | 56 | if retcode: 57 | logger.error('aplay failed with %d', retcode) 58 | 59 | def play_wav(self, wav_path): 60 | """Play audio from the given WAV file. 61 | 62 | The file should be mono and small enough to load into memory. 63 | Args: 64 | wav_path: path to the wav file 65 | """ 66 | with wave.open(wav_path, 'r') as wav: 67 | if wav.getnchannels() != 1: 68 | raise ValueError(wav_path + ' is not a mono file') 69 | 70 | frames = wav.readframes(wav.getnframes()) 71 | self.play_bytes(frames, wav.getframerate(), wav.getsampwidth()) 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Google Voice Kit Bot 2 | 3 | Run a custom python script on a Google Voice Kit (Raspberry Pi) to do things, such control Spotify being played on another machine, via Slack and [Awkbot](https://github.com/thegingerbloke/awkbot-slack) 4 | 5 | ## Usage 6 | 7 | Once this is set up, you can control Spotify using the following commands: 8 | 9 | * "OK Google, play music" 10 | * "OK Google, pause music" 11 | * "OK Google, skip track" 12 | 13 | To find out all available commands: 14 | 15 | * "OK Google, what can we do" 16 | 17 | (Then look in the Slack bot debug channel to see a list) 18 | 19 | ## Installation 20 | 21 | * Set up a Google Voice Kit with a Raspberry Pi, following the [online instructions](https://aiyprojects.withgoogle.com/voice) 22 | 23 | * Set up an [awkbot](https://github.com/thegingerbloke/awkbot-slack) - the Google Voice Kit will be posting commands into Slack, but we need a separate bot with a 'direct' connection to Spotify to detect those instructions and act upon them. 24 | 25 | * Register a new bot in Slack: 26 | 27 | * Install the _Bot_ Slack integration. Visit the following URL, replacing `{SLACK-ACCOUNT-NAME}` with your account: 28 | 29 | https://{SLACK-ACCOUNT-NAME}.slack.com/apps/A0F7YS25R-bots 30 | 31 | * Create a new bot user - e.g. `@voicekitbot` 32 | 33 | https://{SLACK-ACCOUNT-NAME}.slack.com/apps/new/A0F7YS25R-bots 34 | 35 | * Once saved, take a note of the bot API key 36 | 37 | * Add the bot user to the channel that you want it to post messages to (e.g. the `@awkbot` debug channel) 38 | 39 | * Clone this repo onto the Pi into the directory: 40 | 41 | ``` 42 | /home/pi/AIY-voice-kit-python/ 43 | ``` 44 | 45 | * From this directory, start the virtualenv: 46 | 47 | ``` 48 | source env/bin/activate 49 | ``` 50 | 51 | * Move into the cloned directory 52 | 53 | ``` 54 | cd voicekitbot/ 55 | ``` 56 | 57 | * Duplicate the `config.example.py` file, rename it to `config.py` and fill in the blanks 58 | 59 | * Install the requirements: 60 | 61 | ``` 62 | pip install -r requirements.txt 63 | ``` 64 | 65 | * Ensure the script works by running it manually: 66 | 67 | ``` 68 | python voicekitbot.py 69 | ``` 70 | 71 | * Set up the script to start when you power up the Pi: 72 | 73 | ``` 74 | sudo cp voicekitbot.service /lib/systemd/system/ 75 | sudo systemctl enable voicekitbot.service 76 | ``` 77 | 78 | * To manually start/stop this service, run: 79 | 80 | ``` 81 | sudo service voicekitbot start 82 | sudo service voicekitbot stop 83 | sudo service voicekitbot status 84 | ``` 85 | -------------------------------------------------------------------------------- /commands/studio/studio.py: -------------------------------------------------------------------------------- 1 | import random 2 | import requests 3 | import re 4 | 5 | router_login = {} 6 | people = [] 7 | 8 | 9 | def init(config): 10 | global router_login 11 | global people 12 | router_login = config.router 13 | people = config.people 14 | 15 | 16 | def list_actions(): 17 | """ A list of actions in this file that can be called """ 18 | return [ 19 | { 20 | "description": "Choose someone to make the tea", 21 | "content": ["tea", "brew", "whose turn to make tea", "who's brewing up", "i want a cup of splosh"], 22 | "function": "tea" 23 | } 24 | ] 25 | 26 | 27 | def tea(text): 28 | people_in = get_people() 29 | if len(people_in) == 0: 30 | output = "Looks like I'm making my own tea again :'(" 31 | else: 32 | output = "%s, get the kettle on" % random.choice(people_in) 33 | 34 | return { 35 | "post": output, 36 | "say": output 37 | } 38 | 39 | 40 | def get_people(): 41 | session_id = retrieve_session_id() 42 | session_cookie = login(session_id) 43 | people_json = request_json(session_id, session_cookie) 44 | people = parse_json(people_json) 45 | logout(session_id, session_cookie) 46 | return people 47 | 48 | 49 | def retrieve_session_id(): 50 | url = router_login["url"] 51 | r = requests.head(url) 52 | return re.search("%s(.*)%s" % ("=", "; "), 53 | r.headers["Set-Cookie"]).group(1) 54 | 55 | 56 | def login(session_id): 57 | url = "%s/goform/login" % router_login["url"] 58 | data = { 59 | "usr": router_login["username"], 60 | "pwd": router_login["password"], 61 | "preSession": session_id 62 | } 63 | r = requests.post(url, data=data) 64 | return re.search("%s(.*)%s" % ("sessionindex=", "; "), 65 | r.headers["Set-Cookie"]).group(1) 66 | 67 | 68 | def request_json(session_id, session_cookie): 69 | url = "%s/data/getConnectInfo.asp" % router_login["url"] 70 | cookies = { 71 | "preSession": session_id, 72 | "sessionindex": session_cookie 73 | } 74 | r = requests.get(url, cookies=cookies) 75 | return r.json() 76 | 77 | 78 | def logout(session_id, session_cookie): 79 | url = "%s/goform/logout" % router_login["url"] 80 | cookies = { 81 | "preSession": session_id, 82 | "sessionindex": session_cookie 83 | } 84 | r = requests.get(url, cookies=cookies) 85 | 86 | 87 | def parse_json(json): 88 | people_in = [] 89 | for person, person_macs in people.items(): 90 | for device in json: 91 | if (device["macAddr"] in person_macs and 92 | device["online"] == "active"): 93 | people_in.append(person) 94 | break 95 | 96 | return people_in 97 | -------------------------------------------------------------------------------- /aiy/assistant/grpc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """An API to access the Google Assistant.""" 16 | 17 | import aiy._apis._speech 18 | import aiy.assistant.auth_helpers 19 | import aiy.audio 20 | import aiy.voicehat 21 | 22 | # Global variables. They are lazily initialized. 23 | _assistant_recognizer = None 24 | 25 | 26 | class _AssistantRecognizer(object): 27 | """Your personal Google Assistant.""" 28 | 29 | def __init__(self, credentials): 30 | self._request = aiy._apis._speech.AssistantSpeechRequest(credentials) 31 | self._recorder = aiy.audio.get_recorder() 32 | 33 | def recognize(self): 34 | """Recognizes the user's speech and gets answers from Google Assistant. 35 | 36 | This function listens to the user's speech via the VoiceHat speaker and 37 | sends the audio to the Google Assistant Library. The response is returned in 38 | both text and audio. 39 | 40 | Usage: 41 | transcript, audio = my_recognizer.recognize() 42 | if transcript is not None: 43 | print('You said ', transcript) 44 | aiy.audio.play_audio(audio) 45 | """ 46 | self._request.reset() 47 | self._request.set_endpointer_cb(self._endpointer_callback) 48 | self._recorder.add_processor(self._request) 49 | response = self._request.do_request() 50 | return response.transcript, response.response_audio 51 | 52 | def _endpointer_callback(self): 53 | self._recorder.remove_processor(self._request) 54 | 55 | 56 | def get_assistant(): 57 | """Returns a recognizer that uses Google Assistant APIs. 58 | 59 | Sample usage: 60 | button = aiy.voicehat.get_button() 61 | recognizer = aiy.assistant.grpc.get_recognizer() 62 | print('Your Google Assistant is ready.') 63 | while True: 64 | print('Press the button and speak') 65 | button.wait_for_press() 66 | print('Listening...') 67 | transcript, audio = recognizer.recognize() 68 | if transcript is not None: 69 | print('Assistant said ', transcript) 70 | if audio is not None: 71 | aiy.audio.play_audio(audio) 72 | """ 73 | global _assistant_recognizer 74 | if not _assistant_recognizer: 75 | credentials = aiy.assistant.auth_helpers.get_assistant_credentials() 76 | _assistant_recognizer = _AssistantRecognizer(credentials) 77 | return _assistant_recognizer 78 | -------------------------------------------------------------------------------- /aiy/_drivers/_status_ui.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """A status UI powered by the LED on the VoiceHat.""" 16 | 17 | import logging 18 | import os.path 19 | 20 | import aiy.audio 21 | import aiy.voicehat 22 | 23 | logger = logging.getLogger('status_ui') 24 | 25 | 26 | class _StatusUi(object): 27 | """Gives the user status feedback. 28 | 29 | The LED and optionally a trigger sound tell the user when the box is 30 | ready, listening or thinking. 31 | """ 32 | 33 | def __init__(self): 34 | self._trigger_sound_wave = None 35 | self._state_map = { 36 | "starting": aiy.voicehat.LED.PULSE_QUICK, 37 | "ready": aiy.voicehat.LED.BEACON_DARK, 38 | "listening": aiy.voicehat.LED.ON, 39 | "thinking": aiy.voicehat.LED.PULSE_QUICK, 40 | "stopping": aiy.voicehat.LED.PULSE_QUICK, 41 | "power-off": aiy.voicehat.LED.OFF, 42 | "error": aiy.voicehat.LED.BLINK_3, 43 | } 44 | aiy.voicehat.get_led().set_state(aiy.voicehat.LED.OFF) 45 | 46 | def set_trigger_sound_wave(self, trigger_sound_wave): 47 | """Set the trigger sound. 48 | 49 | A trigger sound is played when the status is 'listening' to indicate 50 | that the assistant is actively listening to the user. 51 | The trigger_sound_wave argument should be the path to a valid wave file. 52 | If it is None, the trigger sound is disabled. 53 | """ 54 | if not trigger_sound_wave: 55 | self._trigger_sound_wave = None 56 | expanded_path = os.path.expanduser(trigger_sound_wave) 57 | if os.path.exists(expanded_path): 58 | self._trigger_sound_wave = expanded_path 59 | else: 60 | logger.warning( 61 | 'File %s specified as trigger sound does not exist.', 62 | trigger_sound_wave) 63 | self._trigger_sound_wave = None 64 | 65 | def status(self, status): 66 | """Activate the status. 67 | 68 | This method updates the LED animation. Returns True if the status is 69 | valid and has been updated. 70 | """ 71 | if status not in self._state_map: 72 | logger.warning("unsupported state: %s, must be one of %s", 73 | status, ",".join(self._state_map.keys())) 74 | return False 75 | aiy.voicehat.get_led().set_state(self._state_map[status]) 76 | if status == 'listening' and self._trigger_sound_wave: 77 | aiy.audio.play_wave(self._trigger_sound_wave) 78 | return True 79 | -------------------------------------------------------------------------------- /aiy/cloudspeech.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """An API to access Google Speech recognition service.""" 16 | 17 | import os.path 18 | 19 | import aiy._apis._speech 20 | import aiy.audio 21 | import aiy.voicehat 22 | 23 | # Global variables. They are lazily initialized. 24 | _cloudspeech_recognizer = None 25 | 26 | # Expected location of the CloudSpeech credentials file: 27 | CLOUDSPEECH_CREDENTIALS_FILE = os.path.expanduser('~/cloud_speech.json') 28 | 29 | 30 | class _CloudSpeechRecognizer(object): 31 | """A speech recognizer backed by the Google CloudSpeech APIs. 32 | """ 33 | 34 | def __init__(self, credentials_file): 35 | self._request = aiy._apis._speech.CloudSpeechRequest(credentials_file) 36 | self._recorder = aiy.audio.get_recorder() 37 | 38 | def recognize(self): 39 | """Recognizes the user's speech and transcript it into text. 40 | 41 | This function listens to the user's speech via the VoiceHat speaker. Then it 42 | contacts Google CloudSpeech APIs and returns a textual transcript if possible. 43 | """ 44 | self._request.reset() 45 | self._request.set_endpointer_cb(self._endpointer_callback) 46 | self._recorder.add_processor(self._request) 47 | return self._request.do_request().transcript 48 | 49 | def expect_phrase(self, phrase): 50 | """Explicitly tells the engine that the phrase is more likely to appear. 51 | 52 | This method is optional and makes speech recognition more accurate 53 | especially when certain commands are expected. 54 | 55 | For example, a light control system may want to add the following commands: 56 | 57 | recognizer.expect_phrase('light on') 58 | recognizer.expect_phrase('light off') 59 | """ 60 | self._request.add_phrase(phrase) 61 | 62 | def _endpointer_callback(self): 63 | self._recorder.remove_processor(self._request) 64 | 65 | 66 | def get_recognizer(): 67 | """Returns a recognizer that uses Google CloudSpeech APIs. 68 | 69 | Sample usage: 70 | button = aiy.voicehat.get_button() 71 | recognizer = aiy.cloudspeech.get_recognizer() 72 | while True: 73 | print('Press the button and speak') 74 | button.wait_for_press() 75 | text = recognizer.recognize() 76 | if 'light on' in text: 77 | turn_on_light() 78 | elif 'light off' in text: 79 | turn_off_light() 80 | """ 81 | global _cloudspeech_recognizer 82 | if not _cloudspeech_recognizer: 83 | _cloudspeech_recognizer = _CloudSpeechRecognizer(CLOUDSPEECH_CREDENTIALS_FILE) 84 | return _cloudspeech_recognizer 85 | -------------------------------------------------------------------------------- /aiy/voicehat.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Drivers for shared functionality provided by the VoiceHat.""" 16 | 17 | import aiy._drivers._button 18 | import aiy._drivers._led 19 | import aiy._drivers._status_ui 20 | 21 | # GPIO definitions (BCM) 22 | _GPIO_BUTTON = 23 23 | _GPIO_LED = 25 24 | 25 | # Import LED class to expose the LED constants. 26 | LED = aiy._drivers._led.LED 27 | 28 | # Global variables. They are lazily initialized. 29 | _voicehat_button = None 30 | _voicehat_led = None 31 | _status_ui = None 32 | 33 | 34 | def get_button(): 35 | """Returns a driver to the VoiceHat button. 36 | 37 | The button driver detects edges on _GPIO_BUTTON. It can be used both 38 | synchronously and asynchrously. 39 | 40 | Synchronous usage: 41 | button = aiy.voicehat.get_button() 42 | button.wait_for_press() 43 | # The above function does not return until the button is pressed. 44 | my_recognizer.recognize() 45 | ... 46 | 47 | Asynchronous usage: 48 | def on_button_press(_): 49 | print('The button is pressed!') 50 | 51 | button = aiy.voicehat.get_button() 52 | button.on_press(on_button_press) 53 | # The console will print 'The button is pressed!' every time the button is 54 | # pressed. 55 | ... 56 | # To cancel the callback, pass None: 57 | button.on_press(None) 58 | # Calling wait_for_press() also cancels any callback. 59 | """ 60 | global _voicehat_button 61 | if not _voicehat_button: 62 | _voicehat_button = aiy._drivers._button.Button(channel=_GPIO_BUTTON) 63 | return _voicehat_button 64 | 65 | 66 | def get_led(): 67 | """Returns a driver to control the VoiceHat LED light with various animations. 68 | 69 | led = aiy.voicehat.get_led() 70 | 71 | # You may set any LED animation: 72 | led.set_state(aiy.voicehat.LED.PULSE_QUICK) 73 | led.set_state(aiy.voicehat.LED.BLINK) 74 | 75 | # Or turn off the light but keep the driver running: 76 | led.set_state(aiy.voicehat.LED_OFF) 77 | """ 78 | global _voicehat_led 79 | if not _voicehat_led: 80 | _voicehat_led = aiy._drivers._led.LED(channel=_GPIO_LED) 81 | _voicehat_led.start() 82 | return _voicehat_led 83 | 84 | 85 | def get_status_ui(): 86 | """Returns a driver to control the LED via statuses. 87 | 88 | The supported statuses are: 89 | - "starting" 90 | - "ready" 91 | - "listening" 92 | - "thinking" 93 | - "stopping" 94 | - "power-off" 95 | - "error" 96 | 97 | Optionally, a sound may be played once when the status changes to 98 | "listening". For example, if you have a wave file at ~/ding.wav, you may set 99 | the trigger sound by: 100 | aiy.voicehat.get_status_ui().set_trigger_sound_wave('~/ding.wav') 101 | 102 | To set the status, use: 103 | aiy.voicehat.get_status_ui().set_state('starting') 104 | aiy.voicehat.get_status_ui().set_state('thinking') 105 | """ 106 | global _status_ui 107 | if not _status_ui: 108 | _status_ui = aiy._drivers._status_ui._StatusUi() 109 | return _status_ui 110 | -------------------------------------------------------------------------------- /aiy/_drivers/_button.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Button driver for the VoiceHat.""" 16 | 17 | import time 18 | import RPi.GPIO as GPIO 19 | 20 | 21 | class Button(object): 22 | """Detect edges on the given GPIO channel.""" 23 | 24 | def __init__(self, 25 | channel, 26 | polarity=GPIO.FALLING, 27 | pull_up_down=GPIO.PUD_UP, 28 | debounce_time=0.08): 29 | """A simple GPIO-based button driver. 30 | 31 | This driver supports a simple GPIO-based button. It works by detecting 32 | edges on the given GPIO channel. Debouncing is automatic. 33 | 34 | Args: 35 | channel: the GPIO pin number to use (BCM mode) 36 | polarity: the GPIO polarity to detect; either GPIO.FALLING or 37 | GPIO.RISING. 38 | pull_up_down: whether the port should be pulled up or down; defaults to 39 | GPIO.PUD_UP. 40 | debounce_time: the time used in debouncing the button in seconds. 41 | """ 42 | if polarity not in [GPIO.FALLING, GPIO.RISING]: 43 | raise ValueError( 44 | 'polarity must be one of: GPIO.FALLING or GPIO.RISING') 45 | 46 | self.channel = int(channel) 47 | self.polarity = polarity 48 | self.expected_value = polarity == GPIO.RISING 49 | self.debounce_time = debounce_time 50 | 51 | GPIO.setmode(GPIO.BCM) 52 | GPIO.setup(channel, GPIO.IN, pull_up_down=pull_up_down) 53 | 54 | self.callback = None 55 | 56 | def __del__(self): 57 | GPIO.cleanup(self.channel) 58 | 59 | def wait_for_press(self): 60 | """Wait for the button to be pressed. 61 | 62 | This method blocks until the button is pressed. 63 | """ 64 | GPIO.add_event_detect(self.channel, self.polarity) 65 | while True: 66 | if GPIO.event_detected(self.channel) and self._debounce(): 67 | GPIO.remove_event_detect(self.channel) 68 | return 69 | time.sleep(0.02) 70 | 71 | def on_press(self, callback): 72 | """Call the callback whenever the button is pressed. 73 | 74 | Args: 75 | callback: a function to call whenever the button is pressed. It should 76 | take a single channel number. If the callback is None, the previously 77 | registered callback, if any, is canceled. 78 | 79 | Example: 80 | def MyButtonPressHandler(channel): 81 | print "button pressed: channel = %d" % channel 82 | my_button.on_press(MyButtonPressHandler) 83 | """ 84 | GPIO.remove_event_detect(self.channel) 85 | if callback: 86 | self.callback = callback 87 | GPIO.add_event_detect( 88 | self.channel, self.polarity, callback=self._debounce_and_callback) 89 | 90 | def _debounce_and_callback(self, _): 91 | if self._debounce(): 92 | self.callback() 93 | 94 | def _debounce(self): 95 | """Debounce the GPIO signal. 96 | 97 | Check that the input holds the expected value for the debounce 98 | period, to avoid false trigger on short pulses. 99 | """ 100 | start = time.time() 101 | while time.time() < start + self.debounce_time: 102 | if GPIO.input(self.channel) != self.expected_value: 103 | return False 104 | time.sleep(0.01) 105 | return True 106 | -------------------------------------------------------------------------------- /voicekitbot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import logging 4 | import subprocess 5 | import sys 6 | import os 7 | 8 | import aiy.assistant.auth_helpers 9 | import aiy.audio 10 | import aiy.voicehat 11 | 12 | import config 13 | 14 | from importlib import import_module 15 | 16 | from google.assistant.library import Assistant 17 | from google.assistant.library.event import EventType 18 | 19 | from slackbot import Bot 20 | 21 | # logging.basicConfig( 22 | # level=logging.INFO, 23 | # format="[%(asctime)s] %(levelname)s:%(name)s:%(message)s" 24 | # ) 25 | 26 | 27 | class Voicekitbot(): 28 | def __init__(self): 29 | self.bot = Bot() 30 | self.gather_commands() 31 | 32 | credentials = aiy.assistant.auth_helpers.get_assistant_credentials() 33 | with Assistant(credentials) as assistant: 34 | for event in assistant.start(): 35 | self.process_event(assistant, event) 36 | 37 | def gather_commands(self): 38 | self.modules = {} 39 | self.actions = {} 40 | 41 | for root, dirs, files in os.walk("voicekitbot/commands"): 42 | for file in files: 43 | 44 | if (not file.endswith(".py")) or file.startswith("__"): 45 | continue 46 | 47 | path_name = root.replace("voicekitbot/", "") 48 | path_name = path_name.replace("/", ".") 49 | module_name = file.replace(".py", "") 50 | 51 | module = import_module("%s.%s" % (path_name, module_name)) 52 | 53 | init = getattr(module, "init") 54 | init(config) 55 | 56 | list_actions = getattr(module, "list_actions") 57 | 58 | self.modules[module_name] = module 59 | self.actions[module_name] = list_actions() 60 | 61 | def process_event(self, assistant, event): 62 | if event.type == EventType.ON_START_FINISHED: 63 | self.log('ready') 64 | if sys.stdout.isatty(): 65 | self.log('Say "OK, Google" then speak, or press Ctrl+C to quit...') 66 | 67 | elif event.type == EventType.ON_CONVERSATION_TURN_STARTED: 68 | self.log('listening') 69 | 70 | elif event.type == EventType.ON_END_OF_UTTERANCE: 71 | self.log('thinking') 72 | 73 | elif event.type == EventType.ON_CONVERSATION_TURN_FINISHED: 74 | self.log('ready') 75 | 76 | elif event.type == EventType.ON_ASSISTANT_ERROR and event.args and event.args['is_fatal']: 77 | self.log('something killed me...') 78 | sys.exit(1) 79 | 80 | elif event.type == EventType.ON_RECOGNIZING_SPEECH_FINISHED and event.args: 81 | self.process_command(assistant, event.args['text']) 82 | 83 | def process_command(self, assistant, text): 84 | text = text.lower() 85 | self.log("I think you just said '%s'" % text) 86 | for module_name, module in self.modules.items(): 87 | for action in self.actions[module_name]: 88 | for message in action["content"]: 89 | if text == message: 90 | assistant.stop_conversation() 91 | 92 | func = getattr(module, action["function"]) 93 | command = func(text) 94 | 95 | aiy.audio.say(command["say"]) 96 | self.bot.post(command["post"]) 97 | 98 | if (text == "what can we do"): 99 | assistant.stop_conversation() 100 | self.list_commands() 101 | 102 | def list_commands(self): 103 | msg = "```Available commands:\r" 104 | for module_name, module in self.modules.items(): 105 | msg += "\r%s:\r" % module_name 106 | for action in self.actions[module_name]: 107 | msg += "- *%s*: %s\r" % ( 108 | '/'.join(action["content"]), action["description"]) 109 | msg += "```" 110 | self.log(msg) 111 | 112 | def log(self, message): 113 | status_ui = aiy.voicehat.get_status_ui() 114 | status_ui.status(message) 115 | self.bot.log(message) 116 | 117 | 118 | voicekitboy = Voicekitbot() 119 | -------------------------------------------------------------------------------- /aiy/audio.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Drivers for audio functionality provided by the VoiceHat.""" 16 | 17 | import time 18 | import wave 19 | 20 | import aiy._drivers._player 21 | import aiy._drivers._recorder 22 | import aiy._drivers._tts 23 | 24 | AUDIO_SAMPLE_SIZE = 2 # bytes per sample 25 | AUDIO_SAMPLE_RATE_HZ = 16000 26 | 27 | # Global variables. They are lazily initialized. 28 | _voicehat_recorder = None 29 | _voicehat_player = None 30 | _status_ui = None 31 | 32 | 33 | class _WaveDump(object): 34 | """A processor that saves recorded audio to a wave file.""" 35 | 36 | def __init__(self, filepath, duration): 37 | self._wave = wave.open(filepath, 'wb') 38 | self._wave.setnchannels(1) 39 | self._wave.setsampwidth(2) 40 | self._wave.setframerate(16000) 41 | self._bytes = 0 42 | self._bytes_limit = int(duration * 16000) * 1 * 2 43 | 44 | def add_data(self, data): 45 | max_bytes = self._bytes_limit - self._bytes 46 | data = data[:max_bytes] 47 | self._bytes += len(data) 48 | if data: 49 | self._wave.writeframes(data) 50 | 51 | def is_done(self): 52 | return self._bytes >= self._bytes_limit 53 | 54 | def __enter__(self): 55 | return self 56 | 57 | def __exit__(self, *args): 58 | self._wave.close() 59 | 60 | 61 | def get_player(): 62 | """Returns a driver to control the VoiceHat speaker. 63 | 64 | The aiy modules automatically use this player. So usually you do not need to 65 | use this. Instead, use 'aiy.audio.play_wave' if you would like to play some 66 | audio. 67 | """ 68 | global _voicehat_player 69 | if not _voicehat_player: 70 | _voicehat_player = aiy._drivers._player.Player() 71 | return _voicehat_player 72 | 73 | 74 | def get_recorder(): 75 | """Returns a driver to control the VoiceHat microphones. 76 | 77 | The aiy modules automatically use this recorder. So usually you do not need to 78 | use this. 79 | """ 80 | global _voicehat_recorder 81 | if not _voicehat_recorder: 82 | _voicehat_recorder = aiy._drivers._recorder.Recorder() 83 | return _voicehat_recorder 84 | 85 | 86 | def record_to_wave(filepath, duration): 87 | """Records an audio for the given duration to a wave file.""" 88 | recorder = get_recorder() 89 | dumper = _WaveDump(filepath, duration) 90 | with recorder, dumper: 91 | recorder.add_processor(dumper) 92 | while not dumper.is_done(): 93 | time.sleep(0.1) 94 | 95 | 96 | def play_wave(wave_file): 97 | """Plays the given wave file. 98 | 99 | The wave file has to be mono and small enough to be loaded in memory. 100 | """ 101 | player = get_player() 102 | player.play_wav(wave_file) 103 | 104 | 105 | def play_audio(audio_data): 106 | """Plays the given audio data.""" 107 | player = get_player() 108 | player.play_bytes(audio_data, sample_width=AUDIO_SAMPLE_SIZE, sample_rate=AUDIO_SAMPLE_RATE_HZ) 109 | 110 | 111 | def say(words, lang=None): 112 | """Says the given words in the given language with Google TTS engine. 113 | 114 | If lang is specified, e.g. "en-US', it will be used to say the given words. 115 | Otherwise, the language from aiy.i18n will be used. 116 | """ 117 | if not lang: 118 | lang = aiy.i18n.get_language_code() 119 | aiy._drivers._tts.say(aiy.audio.get_player(), words, lang=lang) 120 | 121 | 122 | def get_status_ui(): 123 | """Returns a driver to access the StatusUI daemon. 124 | 125 | The StatusUI daemon controls the LEDs in the background. It supports a list 126 | of statuses it is able to communicate with the LED on the Voicehat. 127 | """ 128 | global _status_ui 129 | if not _status_ui: 130 | _status_ui = aiy._drivers._StatusUi() 131 | return _status_ui 132 | -------------------------------------------------------------------------------- /aiy/assistant/auth_helpers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2017 Google Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Auth helpers for Google Assistant API.""" 17 | 18 | import json 19 | import logging 20 | import os 21 | import os.path 22 | import sys 23 | 24 | import google_auth_oauthlib.flow 25 | import google.auth.transport 26 | import google.oauth2.credentials 27 | 28 | 29 | _ASSISTANT_OAUTH_SCOPE = ( 30 | 'https://www.googleapis.com/auth/assistant-sdk-prototype' 31 | ) 32 | 33 | # Legacy fallback: old locations of secrets/credentials. 34 | _OLD_CLIENT_SECRETS = os.path.expanduser('~/client_secrets.json') 35 | _OLD_SERVICE_CREDENTIALS = os.path.expanduser('~/credentials.json') 36 | 37 | _CACHE_DIR = os.getenv('XDG_CACHE_HOME') or os.path.expanduser('~/.cache') 38 | _VR_CACHE_DIR = os.path.join(_CACHE_DIR, 'voice-recognizer') 39 | 40 | _ASSISTANT_CREDENTIALS = ( 41 | os.path.join(_VR_CACHE_DIR, 'assistant_credentials.json') 42 | ) 43 | 44 | # Expected location of the Assistant credentials file: 45 | _ASSISTANT_CREDENTIALS_FILE = os.path.expanduser('~/assistant.json') 46 | 47 | 48 | def _load_credentials(credentials_path): 49 | migrate = False 50 | with open(credentials_path, 'r') as f: 51 | credentials_data = json.load(f) 52 | if 'access_token' in credentials_data: 53 | migrate = True 54 | del credentials_data['access_token'] 55 | credentials_data['scopes'] = [_ASSISTANT_OAUTH_SCOPE] 56 | if migrate: 57 | with open(credentials_path, 'w') as f: 58 | json.dump(credentials_data, f) 59 | credentials = google.oauth2.credentials.Credentials(token=None, 60 | **credentials_data) 61 | http_request = google.auth.transport.requests.Request() 62 | credentials.refresh(http_request) 63 | return credentials 64 | 65 | 66 | def _credentials_flow_interactive(client_secrets_path): 67 | flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file( 68 | client_secrets_path, 69 | scopes=[_ASSISTANT_OAUTH_SCOPE]) 70 | if 'DISPLAY' in os.environ: 71 | credentials = flow.run_local_server() 72 | else: 73 | credentials = flow.run_console() 74 | return credentials 75 | 76 | 77 | def _save_credentials(credentials_path, credentials): 78 | config_path = os.path.dirname(credentials_path) 79 | if not os.path.isdir(config_path): 80 | os.makedirs(config_path) 81 | with open(credentials_path, 'w') as f: 82 | json.dump({ 83 | 'refresh_token': credentials.refresh_token, 84 | 'token_uri': credentials.token_uri, 85 | 'client_id': credentials.client_id, 86 | 'client_secret': credentials.client_secret, 87 | 'scopes': credentials.scopes 88 | }, f) 89 | 90 | 91 | def _try_to_get_credentials(client_secrets): 92 | """Try to get credentials, or print an error and quit on failure.""" 93 | 94 | if os.path.exists(_ASSISTANT_CREDENTIALS): 95 | return _load_credentials(_ASSISTANT_CREDENTIALS) 96 | 97 | if not os.path.exists(_VR_CACHE_DIR): 98 | os.mkdir(_VR_CACHE_DIR) 99 | 100 | if not os.path.exists(client_secrets) and os.path.exists(_OLD_CLIENT_SECRETS): 101 | client_secrets = _OLD_CLIENT_SECRETS 102 | 103 | if not os.path.exists(client_secrets): 104 | print('You need client secrets to use the Assistant API.') 105 | print('Follow these instructions:') 106 | print(' https://developers.google.com/api-client-library/python/auth/installed-app' 107 | '#creatingcred') 108 | print('and put the file at', client_secrets) 109 | sys.exit(1) 110 | 111 | if not os.getenv('DISPLAY') and not sys.stdout.isatty(): 112 | print(""" 113 | To use the Assistant API, manually start the application from the dev terminal. 114 | See the "Turn on the Assistant API" section of the Voice Recognizer 115 | User's Guide for more info.""") 116 | sys.exit(1) 117 | 118 | credentials = _credentials_flow_interactive(client_secrets) 119 | _save_credentials(_ASSISTANT_CREDENTIALS, credentials) 120 | logging.info('OAuth credentials initialized: %s', _ASSISTANT_CREDENTIALS) 121 | return credentials 122 | 123 | 124 | def get_assistant_credentials(credentials_file=None): 125 | if not credentials_file: 126 | credentials_file = _ASSISTANT_CREDENTIALS_FILE 127 | return _try_to_get_credentials(credentials_file) 128 | -------------------------------------------------------------------------------- /aiy/_drivers/_led.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """LED driver for the VoiceHat.""" 16 | 17 | import itertools 18 | import threading 19 | import time 20 | import RPi.GPIO as GPIO 21 | 22 | 23 | class LED: 24 | """Starts a background thread to show patterns with the LED. 25 | 26 | Simple usage: 27 | my_led = LED(channel = 25) 28 | my_led.start() 29 | my_led.set_state(LED.BEACON) 30 | my_led.stop() 31 | """ 32 | 33 | OFF = 0 34 | ON = 1 35 | BLINK = 2 36 | BLINK_3 = 3 37 | BEACON = 4 38 | BEACON_DARK = 5 39 | DECAY = 6 40 | PULSE_SLOW = 7 41 | PULSE_QUICK = 8 42 | 43 | def __init__(self, channel): 44 | self.animator = threading.Thread(target=self._animate, daemon=True) 45 | self.channel = channel 46 | self.iterator = None 47 | self.running = False 48 | self.state = None 49 | self.sleep = 0 50 | GPIO.setmode(GPIO.BCM) 51 | GPIO.setup(channel, GPIO.OUT) 52 | self.pwm = GPIO.PWM(channel, 100) 53 | self.lock = threading.Lock() 54 | 55 | def __del__(self): 56 | self.stop() 57 | GPIO.cleanup(self.channel) 58 | 59 | def start(self): 60 | """Start the LED driver.""" 61 | with self.lock: # pylint: disable=E1129 62 | if not self.running: 63 | self.running = True 64 | self.pwm.start(0) # off by default 65 | self.animator.start() 66 | 67 | def stop(self): 68 | """Stop the LED driver and sets the LED to off.""" 69 | with self.lock: # pylint: disable=E1129 70 | if self.running: 71 | self.running = False 72 | self.animator.join() 73 | self.pwm.stop() 74 | 75 | def set_state(self, state): 76 | """Set the LED driver's new state. 77 | 78 | Note the LED driver must be started for this to have any effect. 79 | """ 80 | with self.lock: # pylint: disable=E1129 81 | self.state = state 82 | 83 | def _animate(self): 84 | while True: 85 | state = None 86 | running = False 87 | with self.lock: # pylint: disable=E1129 88 | state = self.state 89 | self.state = None 90 | running = self.running 91 | if not running: 92 | return 93 | if state: 94 | if not self._parse_state(state): 95 | raise ValueError('unsupported state: %d' % state) 96 | if self.iterator: 97 | self.pwm.ChangeDutyCycle(next(self.iterator)) 98 | time.sleep(self.sleep) 99 | else: 100 | # We can also wait for a state change here with a Condition. 101 | time.sleep(1) 102 | 103 | def _parse_state(self, state): 104 | self.iterator = None 105 | self.sleep = 0.0 106 | handled = False 107 | 108 | if state == self.OFF: 109 | self.pwm.ChangeDutyCycle(0) 110 | handled = True 111 | elif state == self.ON: 112 | self.pwm.ChangeDutyCycle(100) 113 | handled = True 114 | elif state == self.BLINK: 115 | self.iterator = itertools.cycle([0, 100]) 116 | self.sleep = 0.5 117 | handled = True 118 | elif state == self.BLINK_3: 119 | self.iterator = itertools.cycle([0, 100] * 3 + [0, 0]) 120 | self.sleep = 0.25 121 | handled = True 122 | elif state == self.BEACON: 123 | self.iterator = itertools.cycle( 124 | itertools.chain([30] * 100, [100] * 8, range(100, 30, -5))) 125 | self.sleep = 0.05 126 | handled = True 127 | elif state == self.BEACON_DARK: 128 | self.iterator = itertools.cycle( 129 | itertools.chain([0] * 100, range(0, 30, 3), range(30, 0, -3))) 130 | self.sleep = 0.05 131 | handled = True 132 | elif state == self.DECAY: 133 | self.iterator = itertools.cycle(range(100, 0, -2)) 134 | self.sleep = 0.05 135 | handled = True 136 | elif state == self.PULSE_SLOW: 137 | self.iterator = itertools.cycle( 138 | itertools.chain(range(0, 100, 2), range(100, 0, -2))) 139 | self.sleep = 0.1 140 | handled = True 141 | elif state == self.PULSE_QUICK: 142 | self.iterator = itertools.cycle( 143 | itertools.chain(range(0, 100, 5), range(100, 0, -5))) 144 | self.sleep = 0.05 145 | handled = True 146 | 147 | return handled 148 | -------------------------------------------------------------------------------- /aiy/_drivers/_recorder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """A recorder driver capable of recording voice samples from the VoiceHat microphones.""" 16 | 17 | import logging 18 | import os 19 | import subprocess 20 | import threading 21 | 22 | import aiy._drivers._alsa 23 | 24 | logger = logging.getLogger('recorder') 25 | 26 | 27 | class Recorder(threading.Thread): 28 | """A driver to record audio from the VoiceHat microphones. 29 | 30 | Stream audio from microphone in a background thread and run processing 31 | callbacks. It reads audio in a configurable format from the microphone, 32 | then converts it to a known format before passing it to the processors. 33 | 34 | This driver accumulates input (audio samples) in a local buffer. Once the 35 | buffer contains more than CHUNK_S seconds, it passes the chunk to all 36 | processors. An audio processor defines a 'add_data' method that receives 37 | the chunk of audio samples to process. 38 | """ 39 | 40 | CHUNK_S = 0.1 41 | 42 | def __init__(self, input_device='default', 43 | channels=1, bytes_per_sample=2, sample_rate_hz=16000): 44 | """Create a Recorder with the given audio format. 45 | 46 | The Recorder will not start until start() is called. start() is called 47 | automatically if the Recorder is used in a `with`-statement. 48 | 49 | - input_device: name of ALSA device (for a list, run `arecord -L`) 50 | - channels: number of channels in audio read from the mic 51 | - bytes_per_sample: sample width in bytes (eg 2 for 16-bit audio) 52 | - sample_rate_hz: sample rate in hertz 53 | """ 54 | 55 | super().__init__(daemon=True) 56 | 57 | self._processors = [] 58 | 59 | self._chunk_bytes = int(self.CHUNK_S * sample_rate_hz) * channels * bytes_per_sample 60 | 61 | self._cmd = [ 62 | 'arecord', 63 | '-q', 64 | '-t', 'raw', 65 | '-D', input_device, 66 | '-c', str(channels), 67 | # pylint: disable=W0212 68 | '-f', aiy._drivers._alsa.sample_width_to_string(bytes_per_sample), 69 | '-r', str(sample_rate_hz), 70 | ] 71 | self._arecord = None 72 | self._closed = False 73 | 74 | def add_processor(self, processor): 75 | """Add an audio processor. 76 | 77 | An audio processor is an object that has an 'add_data' method with the 78 | following signature: 79 | class MyProcessor(object): 80 | def __init__(self): 81 | ... 82 | 83 | def add_data(self, data): 84 | # processes the chunk of data here. 85 | 86 | The added processor may be called multiple times with chunks of audio data. 87 | """ 88 | self._processors.append(processor) 89 | 90 | def remove_processor(self, processor): 91 | """Remove an added audio processor.""" 92 | try: 93 | self._processors.remove(processor) 94 | except ValueError: 95 | logger.warn("processor was not found in the list") 96 | 97 | def run(self): 98 | """Reads data from arecord and passes to processors.""" 99 | 100 | self._arecord = subprocess.Popen(self._cmd, stdout=subprocess.PIPE) 101 | logger.info("started recording") 102 | 103 | # Check for race-condition when __exit__ is called at the same time as 104 | # the process is started by the background thread 105 | if self._closed: 106 | self._arecord.kill() 107 | return 108 | 109 | this_chunk = b'' 110 | 111 | while True: 112 | input_data = self._arecord.stdout.read(self._chunk_bytes) 113 | if not input_data: 114 | break 115 | 116 | this_chunk += input_data 117 | if len(this_chunk) >= self._chunk_bytes: 118 | self._handle_chunk(this_chunk[:self._chunk_bytes]) 119 | this_chunk = this_chunk[self._chunk_bytes:] 120 | 121 | if not self._closed: 122 | logger.error('Microphone recorder died unexpectedly, aborting...') 123 | # sys.exit doesn't work from background threads, so use os._exit as 124 | # an emergency measure. 125 | logging.shutdown() 126 | os._exit(1) # pylint: disable=protected-access 127 | 128 | def stop(self): 129 | """Stops the recorder and cleans up all resources.""" 130 | self._closed = True 131 | if self._arecord: 132 | self._arecord.kill() 133 | 134 | def _handle_chunk(self, chunk): 135 | """Send audio chunk to all processors.""" 136 | for p in self._processors: 137 | p.add_data(chunk) 138 | 139 | def __enter__(self): 140 | self.start() 141 | return self 142 | 143 | def __exit__(self, *args): 144 | self.stop() 145 | -------------------------------------------------------------------------------- /aiy/_apis/_speech.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Classes for speech interaction.""" 16 | 17 | from abc import abstractmethod 18 | import collections 19 | import logging 20 | import os 21 | import tempfile 22 | import wave 23 | 24 | import google.auth 25 | import google.auth.exceptions 26 | import google.auth.transport.grpc 27 | import google.auth.transport.requests 28 | from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 as cloud_speech 29 | from google.rpc import code_pb2 as error_code 30 | from google.assistant.embedded.v1alpha1 import embedded_assistant_pb2 31 | import grpc 32 | from six.moves import queue 33 | 34 | import aiy.i18n 35 | 36 | logger = logging.getLogger('speech') 37 | 38 | AUDIO_SAMPLE_SIZE = 2 # bytes per sample 39 | AUDIO_SAMPLE_RATE_HZ = 16000 40 | 41 | # Expected location of the service credentials file: 42 | SERVICE_CREDENTIALS = os.path.expanduser('~/cloud_speech.json') 43 | 44 | 45 | _Result = collections.namedtuple('_Result', ['transcript', 'response_audio']) 46 | 47 | 48 | class Error(Exception): 49 | pass 50 | 51 | 52 | class _ChannelFactory(object): 53 | 54 | """Creates gRPC channels with a given configuration.""" 55 | 56 | def __init__(self, api_host, credentials): 57 | self._api_host = api_host 58 | self._credentials = credentials 59 | 60 | self._checked = False 61 | 62 | def make_channel(self): 63 | """Creates a secure channel.""" 64 | 65 | request = google.auth.transport.requests.Request() 66 | target = self._api_host + ':443' 67 | 68 | if not self._checked: 69 | # Refresh now, to catch any errors early. Otherwise, they'll be 70 | # raised and swallowed somewhere inside gRPC. 71 | self._credentials.refresh(request) 72 | self._checked = True 73 | 74 | return google.auth.transport.grpc.secure_authorized_channel( 75 | self._credentials, request, target) 76 | 77 | 78 | class GenericSpeechRequest(object): 79 | 80 | """Common base class for Cloud Speech and Assistant APIs.""" 81 | 82 | # TODO(rodrigoq): Refactor audio logging. 83 | # pylint: disable=attribute-defined-outside-init,too-many-instance-attributes 84 | 85 | DEADLINE_SECS = 185 86 | 87 | def __init__(self, api_host, credentials): 88 | self.dialog_follow_on = False 89 | self._audio_queue = queue.Queue() 90 | self._phrases = [] 91 | self._channel_factory = _ChannelFactory(api_host, credentials) 92 | self._endpointer_cb = None 93 | self._audio_logging_enabled = False 94 | self._request_log_wav = None 95 | 96 | def add_phrases(self, phrases): 97 | """Makes the recognition more likely to recognize the given phrase(s). 98 | phrases: an object with a method get_phrases() that returns a list of 99 | phrases. 100 | """ 101 | 102 | self._phrases.extend(phrases.get_phrases()) 103 | 104 | def add_phrase(self, phrase): 105 | """Makes the recognition more likely to recognize the given phrase.""" 106 | self._phrases.append(phrase) 107 | 108 | def set_endpointer_cb(self, cb): 109 | """Callback to invoke on end of speech.""" 110 | self._endpointer_cb = cb 111 | 112 | def set_audio_logging_enabled(self, audio_logging_enabled=True): 113 | self._audio_logging_enabled = audio_logging_enabled 114 | 115 | if audio_logging_enabled: 116 | self._audio_log_dir = tempfile.mkdtemp() 117 | self._audio_log_ix = 0 118 | 119 | def reset(self): 120 | while True: 121 | try: 122 | self._audio_queue.get(False) 123 | except queue.Empty: 124 | return 125 | 126 | self.dialog_follow_on = False 127 | 128 | def add_data(self, data): 129 | self._audio_queue.put(data) 130 | 131 | def end_audio(self): 132 | self.add_data(None) 133 | 134 | def _get_speech_context(self): 135 | """Return a SpeechContext instance to bias recognition towards certain 136 | phrases. 137 | """ 138 | return cloud_speech.SpeechContext( 139 | phrases=self._phrases, 140 | ) 141 | 142 | @abstractmethod 143 | def _make_service(self, channel): 144 | """Create a service stub. 145 | """ 146 | return 147 | 148 | @abstractmethod 149 | def _create_config_request(self): 150 | """Create a config request for the given endpoint. 151 | 152 | This is sent first to the server to configure the speech recognition. 153 | """ 154 | return 155 | 156 | @abstractmethod 157 | def _create_audio_request(self, data): 158 | """Create an audio request for the given endpoint. 159 | 160 | This is sent to the server with audio to be recognized. 161 | """ 162 | return 163 | 164 | def _request_stream(self): 165 | """Yields a config request followed by requests constructed from the 166 | audio queue. 167 | """ 168 | yield self._create_config_request() 169 | 170 | while True: 171 | data = self._audio_queue.get() 172 | 173 | if not data: 174 | return 175 | 176 | if self._request_log_wav: 177 | self._request_log_wav.writeframes(data) 178 | 179 | yield self._create_audio_request(data) 180 | 181 | @abstractmethod 182 | def _create_response_stream(self, service, request_stream, deadline): 183 | """Given a request stream, start the gRPC call to get the response 184 | stream. 185 | """ 186 | return 187 | 188 | @abstractmethod 189 | def _stop_sending_audio(self, resp): 190 | """Return true if this response says user has stopped speaking. 191 | 192 | This stops the request from sending further audio. 193 | """ 194 | return 195 | 196 | @abstractmethod 197 | def _handle_response(self, resp): 198 | """Handle a response from the remote API. 199 | 200 | Args: 201 | resp: StreamingRecognizeResponse instance 202 | """ 203 | return 204 | 205 | def _end_audio_request(self): 206 | self.end_audio() 207 | if self._endpointer_cb: 208 | self._endpointer_cb() 209 | 210 | def _handle_response_stream(self, response_stream): 211 | for resp in response_stream: 212 | if resp.error.code != error_code.OK: 213 | self._end_audio_request() 214 | raise Error('Server error: ' + resp.error.message) 215 | 216 | if self._stop_sending_audio(resp): 217 | self._end_audio_request() 218 | 219 | self._handle_response(resp) 220 | 221 | # Server has closed the connection 222 | return self._finish_request() or '' 223 | 224 | def _start_logging_request(self): 225 | """Open a WAV file to log the request audio.""" 226 | self._audio_log_ix += 1 227 | request_filename = '%s/request.%03d.wav' % ( 228 | self._audio_log_dir, self._audio_log_ix) 229 | logger.info('Writing request to %s', request_filename) 230 | 231 | self._request_log_wav = wave.open(request_filename, 'w') 232 | 233 | self._request_log_wav.setnchannels(1) 234 | self._request_log_wav.setsampwidth(AUDIO_SAMPLE_SIZE) 235 | self._request_log_wav.setframerate(AUDIO_SAMPLE_RATE_HZ) 236 | 237 | def _finish_request(self): 238 | """Called after the final response is received.""" 239 | 240 | if self._request_log_wav: 241 | self._request_log_wav.close() 242 | 243 | return _Result(None, None) 244 | 245 | def do_request(self): 246 | """Establishes a connection and starts sending audio to the cloud 247 | endpoint. Responses are handled by the subclass until one returns a 248 | result. 249 | 250 | Returns: 251 | namedtuple with the following fields: 252 | transcript: string with transcript of user query 253 | response_audio: optionally, an audio response from the server 254 | 255 | Raises speech.Error on error. 256 | """ 257 | try: 258 | service = self._make_service(self._channel_factory.make_channel()) 259 | 260 | response_stream = self._create_response_stream( 261 | service, self._request_stream(), self.DEADLINE_SECS) 262 | 263 | if self._audio_logging_enabled: 264 | self._start_logging_request() 265 | 266 | return self._handle_response_stream(response_stream) 267 | except ( 268 | google.auth.exceptions.GoogleAuthError, 269 | grpc.RpcError, 270 | ) as exc: 271 | raise Error('Exception in speech request') from exc 272 | 273 | 274 | class CloudSpeechRequest(GenericSpeechRequest): 275 | 276 | """A transcription request to the Cloud Speech API. 277 | 278 | Args: 279 | credentials_file: path to service account credentials JSON file 280 | """ 281 | 282 | SCOPE = 'https://www.googleapis.com/auth/cloud-platform' 283 | 284 | def __init__(self, credentials_file): 285 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_file 286 | credentials, _ = google.auth.default(scopes=[self.SCOPE]) 287 | 288 | super().__init__('speech.googleapis.com', credentials) 289 | 290 | self.language_code = aiy.i18n.get_language_code() 291 | 292 | if not hasattr(cloud_speech, 'StreamingRecognizeRequest'): 293 | raise ValueError("cloud_speech_pb2.py doesn't have StreamingRecognizeRequest.") 294 | 295 | self._transcript = None 296 | 297 | def reset(self): 298 | super().reset() 299 | self._transcript = None 300 | 301 | def _make_service(self, channel): 302 | return cloud_speech.SpeechStub(channel) 303 | 304 | def _create_config_request(self): 305 | recognition_config = cloud_speech.RecognitionConfig( 306 | # There are a bunch of config options you can specify. See 307 | # https://goo.gl/KPZn97 for the full list. 308 | encoding='LINEAR16', # raw 16-bit signed LE samples 309 | sample_rate=AUDIO_SAMPLE_RATE_HZ, 310 | # For a list of supported languages see: 311 | # https://cloud.google.com/speech/docs/languages. 312 | language_code=self.language_code, # a BCP-47 language tag 313 | speech_context=self._get_speech_context(), 314 | ) 315 | streaming_config = cloud_speech.StreamingRecognitionConfig( 316 | config=recognition_config, 317 | single_utterance=True, # TODO(rodrigoq): find a way to handle pauses 318 | ) 319 | 320 | return cloud_speech.StreamingRecognizeRequest( 321 | streaming_config=streaming_config) 322 | 323 | def _create_audio_request(self, data): 324 | return cloud_speech.StreamingRecognizeRequest(audio_content=data) 325 | 326 | def _create_response_stream(self, service, request_stream, deadline): 327 | return service.StreamingRecognize(request_stream, deadline) 328 | 329 | def _stop_sending_audio(self, resp): 330 | """Check the endpointer type to see if an utterance has ended.""" 331 | 332 | if resp.endpointer_type: 333 | endpointer_type = cloud_speech.StreamingRecognizeResponse.EndpointerType.Name( 334 | resp.endpointer_type) 335 | logger.info('endpointer_type: %s', endpointer_type) 336 | 337 | END_OF_AUDIO = cloud_speech.StreamingRecognizeResponse.EndpointerType.Value('END_OF_AUDIO') 338 | return resp.endpointer_type == END_OF_AUDIO 339 | 340 | def _handle_response(self, resp): 341 | """Store the last transcript we received.""" 342 | if resp.results: 343 | self._transcript = ' '.join( 344 | result.alternatives[0].transcript for result in resp.results) 345 | logger.info('transcript: %s', self._transcript) 346 | 347 | def _finish_request(self): 348 | super()._finish_request() 349 | return _Result(self._transcript, None) 350 | 351 | 352 | class AssistantSpeechRequest(GenericSpeechRequest): 353 | 354 | """A request to the Assistant API, which returns audio and text.""" 355 | 356 | def __init__(self, credentials): 357 | 358 | super().__init__('embeddedassistant.googleapis.com', credentials) 359 | 360 | self._conversation_state = None 361 | self._response_audio = b'' 362 | self._transcript = None 363 | 364 | def reset(self): 365 | super().reset() 366 | self._response_audio = b'' 367 | self._transcript = None 368 | 369 | def _make_service(self, channel): 370 | return embedded_assistant_pb2.EmbeddedAssistantStub(channel) 371 | 372 | def _create_config_request(self): 373 | audio_in_config = embedded_assistant_pb2.AudioInConfig( 374 | encoding='LINEAR16', 375 | sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ, 376 | ) 377 | audio_out_config = embedded_assistant_pb2.AudioOutConfig( 378 | encoding='LINEAR16', 379 | sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ, 380 | volume_percentage=50, 381 | ) 382 | converse_state = embedded_assistant_pb2.ConverseState( 383 | conversation_state=self._conversation_state, 384 | ) 385 | converse_config = embedded_assistant_pb2.ConverseConfig( 386 | audio_in_config=audio_in_config, 387 | audio_out_config=audio_out_config, 388 | converse_state=converse_state, 389 | ) 390 | 391 | return embedded_assistant_pb2.ConverseRequest(config=converse_config) 392 | 393 | def _create_audio_request(self, data): 394 | return embedded_assistant_pb2.ConverseRequest(audio_in=data) 395 | 396 | def _create_response_stream(self, service, request_stream, deadline): 397 | return service.Converse(request_stream, deadline) 398 | 399 | def _stop_sending_audio(self, resp): 400 | if resp.event_type: 401 | logger.info('event_type: %s', resp.event_type) 402 | 403 | return (resp.event_type == 404 | embedded_assistant_pb2.ConverseResponse.END_OF_UTTERANCE) 405 | 406 | def _handle_response(self, resp): 407 | """Accumulate audio and text from the remote end. It will be handled 408 | in _finish_request(). 409 | """ 410 | 411 | if resp.result.spoken_request_text: 412 | logger.info('transcript: %s', resp.result.spoken_request_text) 413 | self._transcript = resp.result.spoken_request_text 414 | 415 | self._response_audio += resp.audio_out.audio_data 416 | 417 | if resp.result.conversation_state: 418 | self._conversation_state = resp.result.conversation_state 419 | 420 | if resp.result.microphone_mode: 421 | self.dialog_follow_on = ( 422 | resp.result.microphone_mode == 423 | embedded_assistant_pb2.ConverseResult.DIALOG_FOLLOW_ON) 424 | 425 | def _finish_request(self): 426 | super()._finish_request() 427 | 428 | if self._response_audio and self._audio_logging_enabled: 429 | self._log_audio_out(self._response_audio) 430 | 431 | return _Result(self._transcript, self._response_audio) 432 | 433 | def _log_audio_out(self, frames): 434 | response_filename = '%s/response.%03d.wav' % ( 435 | self._audio_log_dir, self._audio_log_ix) 436 | logger.info('Writing response to %s', response_filename) 437 | 438 | response_wav = wave.open(response_filename, 'w') 439 | response_wav.setnchannels(1) 440 | response_wav.setsampwidth(AUDIO_SAMPLE_SIZE) 441 | response_wav.setframerate(AUDIO_SAMPLE_RATE_HZ) 442 | response_wav.writeframes(frames) 443 | response_wav.close() 444 | 445 | 446 | if __name__ == '__main__': 447 | logging.basicConfig(level=logging.INFO) 448 | 449 | # for testing: use audio from a file 450 | import argparse 451 | parser = argparse.ArgumentParser() 452 | parser.add_argument('file', nargs='?', default='test_speech.raw') 453 | args = parser.parse_args() 454 | 455 | req = CloudSpeechRequest(SERVICE_CREDENTIALS) 456 | 457 | with open(args.file, 'rb') as f: 458 | while True: 459 | chunk = f.read(64000) 460 | if not chunk: 461 | break 462 | req.add_data(chunk) 463 | req.end_audio() 464 | 465 | print('down response:', req.do_request()) 466 | --------------------------------------------------------------------------------