├── aiy
    ├── __init__.py
    ├── _apis
    │   ├── __init__.py
    │   └── _speech.py
    ├── _drivers
    │   ├── __init__.py
    │   ├── _alsa.py
    │   ├── _tts.py
    │   ├── _player.py
    │   ├── _status_ui.py
    │   ├── _button.py
    │   ├── _led.py
    │   └── _recorder.py
    ├── assistant
    │   ├── __init__.py
    │   ├── grpc.py
    │   └── auth_helpers.py
    ├── i18n.py
    ├── cloudspeech.py
    ├── voicehat.py
    └── audio.py
├── commands
    ├── __init__.py
    ├── spotify
    │   ├── __init__.py
    │   └── spotify.py
    └── studio
    │   ├── __init__.py
    │   └── studio.py
├── requirements.txt
├── .gitignore
├── voicekitbot.service
├── config.example.py
├── slackbot.py
├── README.md
└── voicekitbot.py


/aiy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/aiy/_apis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/commands/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/aiy/_drivers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/aiy/assistant/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/commands/spotify/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/commands/studio/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | slackclient==1.0.5
2 | requests==2.13.0


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | env/
3 | *.pyc
4 | resources/
5 | deploy.sh
6 | config.py
7 | 


--------------------------------------------------------------------------------
/voicekitbot.service:
--------------------------------------------------------------------------------
 1 | Description=Voicekitbot
 2 | 
 3 | [Service]
 4 | ExecStart=/bin/bash -c '/home/pi/AIY-voice-kit-python/env/bin/python3 -u voicekitbot/voicekitbot.py'
 5 | WorkingDirectory=/home/pi/AIY-voice-kit-python
 6 | Restart=always
 7 | User=pi
 8 | 
 9 | [Install]
10 | WantedBy=multi-user.target


--------------------------------------------------------------------------------
/config.example.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A sample config file
 3 | """
 4 | 
 5 | slack = {
 6 |     "api_key": "",
 7 |     "bot_name": "voicekitbot",
 8 |     "awkbot_name": "awkbot",
 9 |     "channel_name": "#general"
10 | }
11 | 
12 | router = {
13 |     "url": "",
14 |     "username": "",
15 |     "password": ""
16 | }
17 | 
18 | people = {
19 |     "Name": [
20 |         "mac:id"
21 |     ]
22 | }
23 | 


--------------------------------------------------------------------------------
/aiy/_drivers/_alsa.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Helpers for ALSA tools, including aplay and arecord."""
16 | 
17 | 
18 | def sample_width_to_string(sample_width):
19 |     """Convert sample width (bytes) to ALSA format string."""
20 |     return {1: 's8', 2: 's16', 4: 's32'}.get(sample_width, None)
21 | 


--------------------------------------------------------------------------------
/slackbot.py:
--------------------------------------------------------------------------------
 1 | #!env/bin/python
 2 | 
 3 | from slackclient import SlackClient
 4 | from importlib import import_module
 5 | from time import sleep
 6 | import config
 7 | import os
 8 | 
 9 | 
10 | class Bot():
11 |     def __init__(self):
12 |         self.sc = SlackClient(config.slack["api_key"])
13 |         self.users = self.sc.api_call("users.list")
14 |         self.channel = config.slack["channel_name"]
15 | 
16 |         self.bot_user = self.get_user_id_by_name(
17 |             config.slack["bot_name"])
18 | 
19 |         self.awkbot_user = self.get_user_id_by_name(
20 |             config.slack["awkbot_name"])
21 | 
22 |     def post(self, message):
23 |         self.sc.api_call(
24 |             "chat.postMessage",
25 |             channel=self.channel,
26 |             text="<@%s> %s" % (self.awkbot_user, message),
27 |             as_user=self.bot_user
28 |         )
29 | 
30 |     def log(self, message):
31 |         self.sc.api_call(
32 |             "chat.postMessage",
33 |             channel=self.channel,
34 |             text="_%s_" % message,
35 |             as_user=self.bot_user
36 |         )
37 | 
38 |     def get_user_id_by_name(self, user_name):
39 |         for user in self.users["members"]:
40 |             if user["name"] == user_name:
41 |                 return user["id"]
42 | 


--------------------------------------------------------------------------------
/commands/spotify/spotify.py:
--------------------------------------------------------------------------------
 1 | def init(config):
 2 |     pass
 3 | 
 4 | 
 5 | def list_actions():
 6 |     """ A list of actions in this file that can be called """
 7 |     return [
 8 |         {
 9 |             "description": "Play music",
10 |             "content": ["play music", "put the needle on the record"],
11 |             "function": "play"
12 |         },
13 |         {
14 |             "description": "Pause music",
15 |             "content": ["pause music", "stop music", "shut the front door"],
16 |             "function": "pause"
17 |         },
18 |         {
19 |             "description": "Skip track",
20 |             "content": ["skip track", "next track", "skip song", "next song"],
21 |             "function": "skip"
22 |         },
23 |         {
24 |             "description": "Previous track",
25 |             "content": ["back", "previous track", "previous song"],
26 |             "function": "back"
27 |         }
28 |     ]
29 | 
30 | 
31 | def play(text):
32 |     return {
33 |         "post": "play",
34 |         "say": "Playing music"
35 |     }
36 | 
37 | 
38 | def pause(text):
39 |     return {
40 |         "post": "pause",
41 |         "say": "Pausing music",
42 |     }
43 | 
44 | 
45 | def skip(text):
46 |     return {
47 |         "post": "skip",
48 |         "say": "Skipping track"
49 |     }
50 | 
51 | 
52 | def back(text):
53 |     return {
54 |         "post": "back",
55 |         "say": "Going back a track"
56 |     }
57 | 


--------------------------------------------------------------------------------
/aiy/i18n.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Internationalization helpers."""
16 | 
17 | import gettext
18 | 
19 | _DEFAULT_LANGUAGE_CODE = 'en-US'
20 | _LOCALE_DOMAIN = 'voice-recognizer'
21 | 
22 | _language_code = _DEFAULT_LANGUAGE_CODE
23 | 
24 | _locale_dir = None
25 | 
26 | 
27 | def set_locale_dir(locale_dir):
28 |     """Sets the directory that contains the language bundles.
29 | 
30 |     This is only required if you call set_language_code with gettext_install=True.
31 |     """
32 |     global _locale_dir
33 |     if not locale_dir:
34 |         raise ValueError('locale_dir must be valid')
35 |     _locale_dir = locale_dir
36 | 
37 | 
38 | def set_language_code(code, gettext_install=False):
39 |     """Set the BCP-47 language code that the speech systems should use.
40 | 
41 |     Args:
42 |       gettext_install: if True, gettext's _() will be installed in as a builtin.
43 |           As this has global effect, it should only be done by applications.
44 |     """
45 |     global _language_code
46 |     _language_code = code.replace('_', '-')
47 | 
48 |     if gettext_install:
49 |         if not _locale_dir:
50 |             raise ValueError('locale_dir is not set. Please call set_locale_dir().')
51 |         language_id = code.replace('-', '_')
52 |         t = gettext.translation(_LOCALE_DOMAIN, _locale_dir, [language_id], fallback=True)
53 |         t.install()
54 | 
55 | 
56 | def get_language_code():
57 |     """Returns the BCP-47 language code that the speech systems should use.
58 | 
59 |     We don't use the system locale because the Assistant API only supports
60 |     en-US at launch, so that should be used by default in all environments.
61 |     """
62 |     return _language_code
63 | 


--------------------------------------------------------------------------------
/aiy/_drivers/_tts.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Wrapper around a TTS system."""
16 | 
17 | import functools
18 | import logging
19 | import os
20 | import subprocess
21 | import tempfile
22 | from aiy import i18n
23 | 
24 | # Path to a tmpfs directory to avoid SD card wear
25 | TMP_DIR = '/run/user/%d' % os.getuid()
26 | 
27 | logger = logging.getLogger('tts')
28 | 
29 | 
30 | def create_say(player):
31 |     """Return a function say(words) for the given player."""
32 |     lang = i18n.get_language_code()
33 |     return functools.partial(say, player, lang=lang)
34 | 
35 | 
36 | def say(player, words, lang='en-US'):
37 |     """Say the given words with TTS.
38 | 
39 |     Args:
40 |       player: To play the text-to-speech audio.
41 |       words: string to say aloud.
42 |       lang: language for the text-to-speech engine.
43 |     """
44 |     try:
45 |         (fd, tts_wav) = tempfile.mkstemp(suffix='.wav', dir=TMP_DIR)
46 |     except IOError:
47 |         logger.exception('Using fallback directory for TTS output')
48 |         (fd, tts_wav) = tempfile.mkstemp(suffix='.wav')
49 |     os.close(fd)
50 |     words = '<volume level="60"><pitch level="130">%s</pitch></volume>' % words
51 |     try:
52 |         subprocess.call(['pico2wave', '--lang', lang, '-w', tts_wav, words])
53 |         player.play_wav(tts_wav)
54 |     finally:
55 |         os.unlink(tts_wav)
56 | 
57 | 
58 | def _main():
59 |     import argparse
60 |     from aiy import audio
61 | 
62 |     logging.basicConfig(level=logging.INFO)
63 | 
64 |     parser = argparse.ArgumentParser(description='Test TTS wrapper')
65 |     parser.add_argument('words', nargs='*', help='Words to say')
66 |     args = parser.parse_args()
67 | 
68 |     if args.words:
69 |         words = ' '.join(args.words)
70 |         player = audio.get_player()
71 |         create_say(player)(words)
72 | 
73 | 
74 | if __name__ == '__main__':
75 |     _main()
76 | 


--------------------------------------------------------------------------------
/aiy/_drivers/_player.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """A driver for audio playback."""
16 | 
17 | import logging
18 | import subprocess
19 | import wave
20 | 
21 | import aiy._drivers._alsa
22 | 
23 | logger = logging.getLogger('audio')
24 | 
25 | 
26 | class Player(object):
27 |     """Plays short audio clips from a buffer or file."""
28 | 
29 |     def __init__(self, output_device='default'):
30 |         self._output_device = output_device
31 | 
32 |     def play_bytes(self, audio_bytes, sample_rate, sample_width=2):
33 |         """Play audio from the given bytes-like object.
34 | 
35 |         Args:
36 |           audio_bytes: audio data (mono)
37 |           sample_rate: sample rate in Hertz (24 kHz by default)
38 |           sample_width: sample width in bytes (eg 2 for 16-bit audio)
39 |         """
40 |         cmd = [
41 |             'aplay',
42 |             '-q',
43 |             '-t', 'raw',
44 |             '-D', self._output_device,
45 |             '-c', '1',
46 |             # pylint: disable=W0212
47 |             '-f', aiy._drivers._alsa.sample_width_to_string(sample_width),
48 |             '-r', str(sample_rate),
49 |         ]
50 | 
51 |         aplay = subprocess.Popen(cmd, stdin=subprocess.PIPE)
52 |         aplay.stdin.write(audio_bytes)
53 |         aplay.stdin.close()
54 |         retcode = aplay.wait()
55 | 
56 |         if retcode:
57 |             logger.error('aplay failed with %d', retcode)
58 | 
59 |     def play_wav(self, wav_path):
60 |         """Play audio from the given WAV file.
61 | 
62 |         The file should be mono and small enough to load into memory.
63 |         Args:
64 |           wav_path: path to the wav file
65 |         """
66 |         with wave.open(wav_path, 'r') as wav:
67 |             if wav.getnchannels() != 1:
68 |                 raise ValueError(wav_path + ' is not a mono file')
69 | 
70 |             frames = wav.readframes(wav.getnframes())
71 |             self.play_bytes(frames, wav.getframerate(), wav.getsampwidth())
72 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Google Voice Kit Bot
 2 | 
 3 | Run a custom python script on a Google Voice Kit (Raspberry Pi) to do things, such control Spotify being played on another machine, via Slack and [Awkbot](https://github.com/thegingerbloke/awkbot-slack)
 4 | 
 5 | ## Usage
 6 | 
 7 | Once this is set up, you can control Spotify using the following commands:
 8 | 
 9 | * "OK Google, play music"
10 | * "OK Google, pause music"
11 | * "OK Google, skip track"
12 | 
13 | To find out all available commands:
14 | 
15 | * "OK Google, what can we do"
16 | 
17 | (Then look in the Slack bot debug channel to see a list)
18 | 
19 | ## Installation
20 | 
21 | * Set up a Google Voice Kit with a Raspberry Pi, following the [online instructions](https://aiyprojects.withgoogle.com/voice)
22 | 
23 | * Set up an [awkbot](https://github.com/thegingerbloke/awkbot-slack) - the Google Voice Kit will be posting commands into Slack, but we need a separate bot with a 'direct' connection to Spotify to detect those instructions and act upon them.
24 | 
25 | * Register a new bot in Slack:
26 | 
27 |   * Install the _Bot_ Slack integration. Visit the following URL, replacing `{SLACK-ACCOUNT-NAME}` with your account:
28 | 
29 |     https://{SLACK-ACCOUNT-NAME}.slack.com/apps/A0F7YS25R-bots
30 | 
31 |   * Create a new bot user - e.g. `@voicekitbot`
32 | 
33 |     https://{SLACK-ACCOUNT-NAME}.slack.com/apps/new/A0F7YS25R-bots
34 | 
35 |   * Once saved, take a note of the bot API key
36 | 
37 |   * Add the bot user to the channel that you want it to post messages to (e.g. the `@awkbot` debug channel)
38 | 
39 | * Clone this repo onto the Pi into the directory:
40 | 
41 |   ```
42 |   /home/pi/AIY-voice-kit-python/
43 |   ```
44 | 
45 | * From this directory, start the virtualenv:
46 | 
47 |   ```
48 |   source env/bin/activate
49 |   ```
50 | 
51 | * Move into the cloned directory
52 | 
53 |   ```
54 |   cd voicekitbot/
55 |   ```
56 | 
57 | * Duplicate the `config.example.py` file, rename it to `config.py` and fill in the blanks
58 | 
59 | * Install the requirements:
60 | 
61 |   ```
62 |   pip install -r requirements.txt
63 |   ```
64 | 
65 | * Ensure the script works by running it manually:
66 | 
67 |   ```
68 |   python voicekitbot.py
69 |   ```
70 | 
71 | * Set up the script to start when you power up the Pi:
72 | 
73 |   ```
74 |   sudo cp voicekitbot.service /lib/systemd/system/
75 |   sudo systemctl enable voicekitbot.service
76 |   ```
77 | 
78 |   * To manually start/stop this service, run:
79 | 
80 |     ```
81 |     sudo service voicekitbot start
82 |     sudo service voicekitbot stop
83 |     sudo service voicekitbot status
84 |     ```
85 | 


--------------------------------------------------------------------------------
/commands/studio/studio.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import requests
 3 | import re
 4 | 
 5 | router_login = {}
 6 | people = []
 7 | 
 8 | 
 9 | def init(config):
10 |     global router_login
11 |     global people
12 |     router_login = config.router
13 |     people = config.people
14 | 
15 | 
16 | def list_actions():
17 |     """ A list of actions in this file that can be called """
18 |     return [
19 |         {
20 |             "description": "Choose someone to make the tea",
21 |             "content": ["tea", "brew", "whose turn to make tea", "who's brewing up", "i want a cup of splosh"],
22 |             "function": "tea"
23 |         }
24 |     ]
25 | 
26 | 
27 | def tea(text):
28 |     people_in = get_people()
29 |     if len(people_in) == 0:
30 |         output = "Looks like I'm making my own tea again :'("
31 |     else:
32 |         output = "%s, get the kettle on" % random.choice(people_in)
33 | 
34 |     return {
35 |         "post": output,
36 |         "say": output
37 |     }
38 | 
39 | 
40 | def get_people():
41 |     session_id = retrieve_session_id()
42 |     session_cookie = login(session_id)
43 |     people_json = request_json(session_id, session_cookie)
44 |     people = parse_json(people_json)
45 |     logout(session_id, session_cookie)
46 |     return people
47 | 
48 | 
49 | def retrieve_session_id():
50 |     url = router_login["url"]
51 |     r = requests.head(url)
52 |     return re.search("%s(.*)%s" % ("=", "; "),
53 |                      r.headers["Set-Cookie"]).group(1)
54 | 
55 | 
56 | def login(session_id):
57 |     url = "%s/goform/login" % router_login["url"]
58 |     data = {
59 |         "usr": router_login["username"],
60 |         "pwd": router_login["password"],
61 |         "preSession": session_id
62 |     }
63 |     r = requests.post(url, data=data)
64 |     return re.search("%s(.*)%s" % ("sessionindex=", "; "),
65 |                      r.headers["Set-Cookie"]).group(1)
66 | 
67 | 
68 | def request_json(session_id, session_cookie):
69 |     url = "%s/data/getConnectInfo.asp" % router_login["url"]
70 |     cookies = {
71 |         "preSession": session_id,
72 |         "sessionindex": session_cookie
73 |     }
74 |     r = requests.get(url, cookies=cookies)
75 |     return r.json()
76 | 
77 | 
78 | def logout(session_id, session_cookie):
79 |     url = "%s/goform/logout" % router_login["url"]
80 |     cookies = {
81 |         "preSession": session_id,
82 |         "sessionindex": session_cookie
83 |     }
84 |     r = requests.get(url, cookies=cookies)
85 | 
86 | 
87 | def parse_json(json):
88 |     people_in = []
89 |     for person, person_macs in people.items():
90 |         for device in json:
91 |             if (device["macAddr"] in person_macs and
92 |                     device["online"] == "active"):
93 |                 people_in.append(person)
94 |                 break
95 | 
96 |     return people_in
97 | 


--------------------------------------------------------------------------------
/aiy/assistant/grpc.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """An API to access the Google Assistant."""
16 | 
17 | import aiy._apis._speech
18 | import aiy.assistant.auth_helpers
19 | import aiy.audio
20 | import aiy.voicehat
21 | 
22 | # Global variables. They are lazily initialized.
23 | _assistant_recognizer = None
24 | 
25 | 
26 | class _AssistantRecognizer(object):
27 |     """Your personal Google Assistant."""
28 | 
29 |     def __init__(self, credentials):
30 |         self._request = aiy._apis._speech.AssistantSpeechRequest(credentials)
31 |         self._recorder = aiy.audio.get_recorder()
32 | 
33 |     def recognize(self):
34 |         """Recognizes the user's speech and gets answers from Google Assistant.
35 | 
36 |         This function listens to the user's speech via the VoiceHat speaker and
37 |         sends the audio to the Google Assistant Library. The response is returned in
38 |         both text and audio.
39 | 
40 |         Usage:
41 |             transcript, audio = my_recognizer.recognize()
42 |             if transcript is not None:
43 |                 print('You said ', transcript)
44 |                 aiy.audio.play_audio(audio)
45 |         """
46 |         self._request.reset()
47 |         self._request.set_endpointer_cb(self._endpointer_callback)
48 |         self._recorder.add_processor(self._request)
49 |         response = self._request.do_request()
50 |         return response.transcript, response.response_audio
51 | 
52 |     def _endpointer_callback(self):
53 |         self._recorder.remove_processor(self._request)
54 | 
55 | 
56 | def get_assistant():
57 |     """Returns a recognizer that uses Google Assistant APIs.
58 | 
59 |     Sample usage:
60 |         button = aiy.voicehat.get_button()
61 |         recognizer = aiy.assistant.grpc.get_recognizer()
62 |         print('Your Google Assistant is ready.')
63 |         while True:
64 |             print('Press the button and speak')
65 |             button.wait_for_press()
66 |             print('Listening...')
67 |             transcript, audio = recognizer.recognize()
68 |             if transcript is not None:
69 |                 print('Assistant said ', transcript)
70 |             if audio is not None:
71 |                 aiy.audio.play_audio(audio)
72 |     """
73 |     global _assistant_recognizer
74 |     if not _assistant_recognizer:
75 |         credentials = aiy.assistant.auth_helpers.get_assistant_credentials()
76 |         _assistant_recognizer = _AssistantRecognizer(credentials)
77 |     return _assistant_recognizer
78 | 


--------------------------------------------------------------------------------
/aiy/_drivers/_status_ui.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """A status UI powered by the LED on the VoiceHat."""
16 | 
17 | import logging
18 | import os.path
19 | 
20 | import aiy.audio
21 | import aiy.voicehat
22 | 
23 | logger = logging.getLogger('status_ui')
24 | 
25 | 
26 | class _StatusUi(object):
27 |     """Gives the user status feedback.
28 | 
29 |     The LED and optionally a trigger sound tell the user when the box is
30 |     ready, listening or thinking.
31 |     """
32 | 
33 |     def __init__(self):
34 |         self._trigger_sound_wave = None
35 |         self._state_map = {
36 |             "starting": aiy.voicehat.LED.PULSE_QUICK,
37 |             "ready": aiy.voicehat.LED.BEACON_DARK,
38 |             "listening": aiy.voicehat.LED.ON,
39 |             "thinking": aiy.voicehat.LED.PULSE_QUICK,
40 |             "stopping": aiy.voicehat.LED.PULSE_QUICK,
41 |             "power-off": aiy.voicehat.LED.OFF,
42 |             "error": aiy.voicehat.LED.BLINK_3,
43 |         }
44 |         aiy.voicehat.get_led().set_state(aiy.voicehat.LED.OFF)
45 | 
46 |     def set_trigger_sound_wave(self, trigger_sound_wave):
47 |         """Set the trigger sound.
48 | 
49 |         A trigger sound is played when the status is 'listening' to indicate
50 |         that the assistant is actively listening to the user.
51 |         The trigger_sound_wave argument should be the path to a valid wave file.
52 |         If it is None, the trigger sound is disabled.
53 |         """
54 |         if not trigger_sound_wave:
55 |             self._trigger_sound_wave = None
56 |         expanded_path = os.path.expanduser(trigger_sound_wave)
57 |         if os.path.exists(expanded_path):
58 |             self._trigger_sound_wave = expanded_path
59 |         else:
60 |             logger.warning(
61 |                 'File %s specified as trigger sound does not exist.',
62 |                 trigger_sound_wave)
63 |             self._trigger_sound_wave = None
64 | 
65 |     def status(self, status):
66 |         """Activate the status.
67 | 
68 |         This method updates the LED animation. Returns True if the status is
69 |         valid and has been updated.
70 |         """
71 |         if status not in self._state_map:
72 |             logger.warning("unsupported state: %s, must be one of %s",
73 |                            status, ",".join(self._state_map.keys()))
74 |             return False
75 |         aiy.voicehat.get_led().set_state(self._state_map[status])
76 |         if status == 'listening' and self._trigger_sound_wave:
77 |             aiy.audio.play_wave(self._trigger_sound_wave)
78 |         return True
79 | 


--------------------------------------------------------------------------------
/aiy/cloudspeech.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """An API to access Google Speech recognition service."""
16 | 
17 | import os.path
18 | 
19 | import aiy._apis._speech
20 | import aiy.audio
21 | import aiy.voicehat
22 | 
23 | # Global variables. They are lazily initialized.
24 | _cloudspeech_recognizer = None
25 | 
26 | # Expected location of the CloudSpeech credentials file:
27 | CLOUDSPEECH_CREDENTIALS_FILE = os.path.expanduser('~/cloud_speech.json')
28 | 
29 | 
30 | class _CloudSpeechRecognizer(object):
31 |     """A speech recognizer backed by the Google CloudSpeech APIs.
32 |     """
33 | 
34 |     def __init__(self, credentials_file):
35 |         self._request = aiy._apis._speech.CloudSpeechRequest(credentials_file)
36 |         self._recorder = aiy.audio.get_recorder()
37 | 
38 |     def recognize(self):
39 |         """Recognizes the user's speech and transcript it into text.
40 | 
41 |         This function listens to the user's speech via the VoiceHat speaker. Then it
42 |         contacts Google CloudSpeech APIs and returns a textual transcript if possible.
43 |         """
44 |         self._request.reset()
45 |         self._request.set_endpointer_cb(self._endpointer_callback)
46 |         self._recorder.add_processor(self._request)
47 |         return self._request.do_request().transcript
48 | 
49 |     def expect_phrase(self, phrase):
50 |         """Explicitly tells the engine that the phrase is more likely to appear.
51 | 
52 |         This method is optional and makes speech recognition more accurate
53 |         especially when certain commands are expected.
54 | 
55 |         For example, a light control system may want to add the following commands:
56 | 
57 |         recognizer.expect_phrase('light on')
58 |         recognizer.expect_phrase('light off')
59 |         """
60 |         self._request.add_phrase(phrase)
61 | 
62 |     def _endpointer_callback(self):
63 |         self._recorder.remove_processor(self._request)
64 | 
65 | 
66 | def get_recognizer():
67 |     """Returns a recognizer that uses Google CloudSpeech APIs.
68 | 
69 |     Sample usage:
70 |         button = aiy.voicehat.get_button()
71 |         recognizer = aiy.cloudspeech.get_recognizer()
72 |         while True:
73 |             print('Press the button and speak')
74 |             button.wait_for_press()
75 |             text = recognizer.recognize()
76 |             if 'light on' in text:
77 |                 turn_on_light()
78 |             elif 'light off' in text:
79 |                 turn_off_light()
80 |     """
81 |     global _cloudspeech_recognizer
82 |     if not _cloudspeech_recognizer:
83 |         _cloudspeech_recognizer = _CloudSpeechRecognizer(CLOUDSPEECH_CREDENTIALS_FILE)
84 |     return _cloudspeech_recognizer
85 | 


--------------------------------------------------------------------------------
/aiy/voicehat.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Drivers for shared functionality provided by the VoiceHat."""
 16 | 
 17 | import aiy._drivers._button
 18 | import aiy._drivers._led
 19 | import aiy._drivers._status_ui
 20 | 
 21 | # GPIO definitions (BCM)
 22 | _GPIO_BUTTON = 23
 23 | _GPIO_LED = 25
 24 | 
 25 | # Import LED class to expose the LED constants.
 26 | LED = aiy._drivers._led.LED
 27 | 
 28 | # Global variables. They are lazily initialized.
 29 | _voicehat_button = None
 30 | _voicehat_led = None
 31 | _status_ui = None
 32 | 
 33 | 
 34 | def get_button():
 35 |     """Returns a driver to the VoiceHat button.
 36 | 
 37 |     The button driver detects edges on _GPIO_BUTTON. It can be used both
 38 |     synchronously and asynchrously.
 39 | 
 40 |     Synchronous usage:
 41 |         button = aiy.voicehat.get_button()
 42 |         button.wait_for_press()
 43 |         # The above function does not return until the button is pressed.
 44 |         my_recognizer.recognize()
 45 |         ...
 46 | 
 47 |     Asynchronous usage:
 48 |         def on_button_press(_):
 49 |             print('The button is pressed!')
 50 | 
 51 |         button = aiy.voicehat.get_button()
 52 |         button.on_press(on_button_press)
 53 |         # The console will print 'The button is pressed!' every time the button is
 54 |         # pressed.
 55 |         ...
 56 |         # To cancel the callback, pass None:
 57 |         button.on_press(None)
 58 |         # Calling wait_for_press() also cancels any callback.
 59 |     """
 60 |     global _voicehat_button
 61 |     if not _voicehat_button:
 62 |         _voicehat_button = aiy._drivers._button.Button(channel=_GPIO_BUTTON)
 63 |     return _voicehat_button
 64 | 
 65 | 
 66 | def get_led():
 67 |     """Returns a driver to control the VoiceHat LED light with various animations.
 68 | 
 69 |     led = aiy.voicehat.get_led()
 70 | 
 71 |     # You may set any LED animation:
 72 |     led.set_state(aiy.voicehat.LED.PULSE_QUICK)
 73 |     led.set_state(aiy.voicehat.LED.BLINK)
 74 | 
 75 |     # Or turn off the light but keep the driver running:
 76 |     led.set_state(aiy.voicehat.LED_OFF)
 77 |     """
 78 |     global _voicehat_led
 79 |     if not _voicehat_led:
 80 |         _voicehat_led = aiy._drivers._led.LED(channel=_GPIO_LED)
 81 |         _voicehat_led.start()
 82 |     return _voicehat_led
 83 | 
 84 | 
 85 | def get_status_ui():
 86 |     """Returns a driver to control the LED via statuses.
 87 | 
 88 |     The supported statuses are:
 89 |       - "starting"
 90 |       - "ready"
 91 |       - "listening"
 92 |       - "thinking"
 93 |       - "stopping"
 94 |       - "power-off"
 95 |       - "error"
 96 | 
 97 |     Optionally, a sound may be played once when the status changes to
 98 |     "listening". For example, if you have a wave file at ~/ding.wav, you may set
 99 |     the trigger sound by:
100 |     aiy.voicehat.get_status_ui().set_trigger_sound_wave('~/ding.wav')
101 | 
102 |     To set the status, use:
103 |     aiy.voicehat.get_status_ui().set_state('starting')
104 |     aiy.voicehat.get_status_ui().set_state('thinking')
105 |     """
106 |     global _status_ui
107 |     if not _status_ui:
108 |         _status_ui = aiy._drivers._status_ui._StatusUi()
109 |     return _status_ui
110 | 


--------------------------------------------------------------------------------
/aiy/_drivers/_button.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Button driver for the VoiceHat."""
 16 | 
 17 | import time
 18 | import RPi.GPIO as GPIO
 19 | 
 20 | 
 21 | class Button(object):
 22 |     """Detect edges on the given GPIO channel."""
 23 | 
 24 |     def __init__(self,
 25 |                  channel,
 26 |                  polarity=GPIO.FALLING,
 27 |                  pull_up_down=GPIO.PUD_UP,
 28 |                  debounce_time=0.08):
 29 |         """A simple GPIO-based button driver.
 30 | 
 31 |         This driver supports a simple GPIO-based button. It works by detecting
 32 |         edges on the given GPIO channel. Debouncing is automatic.
 33 | 
 34 |         Args:
 35 |           channel: the GPIO pin number to use (BCM mode)
 36 |           polarity: the GPIO polarity to detect; either GPIO.FALLING or
 37 |             GPIO.RISING.
 38 |           pull_up_down: whether the port should be pulled up or down; defaults to
 39 |             GPIO.PUD_UP.
 40 |           debounce_time: the time used in debouncing the button in seconds.
 41 |         """
 42 |         if polarity not in [GPIO.FALLING, GPIO.RISING]:
 43 |             raise ValueError(
 44 |                 'polarity must be one of: GPIO.FALLING or GPIO.RISING')
 45 | 
 46 |         self.channel = int(channel)
 47 |         self.polarity = polarity
 48 |         self.expected_value = polarity == GPIO.RISING
 49 |         self.debounce_time = debounce_time
 50 | 
 51 |         GPIO.setmode(GPIO.BCM)
 52 |         GPIO.setup(channel, GPIO.IN, pull_up_down=pull_up_down)
 53 | 
 54 |         self.callback = None
 55 | 
 56 |     def __del__(self):
 57 |         GPIO.cleanup(self.channel)
 58 | 
 59 |     def wait_for_press(self):
 60 |         """Wait for the button to be pressed.
 61 | 
 62 |         This method blocks until the button is pressed.
 63 |         """
 64 |         GPIO.add_event_detect(self.channel, self.polarity)
 65 |         while True:
 66 |             if GPIO.event_detected(self.channel) and self._debounce():
 67 |                 GPIO.remove_event_detect(self.channel)
 68 |                 return
 69 |             time.sleep(0.02)
 70 | 
 71 |     def on_press(self, callback):
 72 |         """Call the callback whenever the button is pressed.
 73 | 
 74 |         Args:
 75 |           callback: a function to call whenever the button is pressed. It should
 76 |             take a single channel number. If the callback is None, the previously
 77 |             registered callback, if any, is canceled.
 78 | 
 79 |         Example:
 80 |           def MyButtonPressHandler(channel):
 81 |               print "button pressed: channel = %d" % channel
 82 |           my_button.on_press(MyButtonPressHandler)
 83 |         """
 84 |         GPIO.remove_event_detect(self.channel)
 85 |         if callback:
 86 |             self.callback = callback
 87 |             GPIO.add_event_detect(
 88 |                 self.channel, self.polarity, callback=self._debounce_and_callback)
 89 | 
 90 |     def _debounce_and_callback(self, _):
 91 |         if self._debounce():
 92 |             self.callback()
 93 | 
 94 |     def _debounce(self):
 95 |         """Debounce the GPIO signal.
 96 | 
 97 |         Check that the input holds the expected value for the debounce
 98 |         period, to avoid false trigger on short pulses.
 99 |         """
100 |         start = time.time()
101 |         while time.time() < start + self.debounce_time:
102 |             if GPIO.input(self.channel) != self.expected_value:
103 |                 return False
104 |             time.sleep(0.01)
105 |         return True
106 | 


--------------------------------------------------------------------------------
/voicekitbot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import logging
  4 | import subprocess
  5 | import sys
  6 | import os
  7 | 
  8 | import aiy.assistant.auth_helpers
  9 | import aiy.audio
 10 | import aiy.voicehat
 11 | 
 12 | import config
 13 | 
 14 | from importlib import import_module
 15 | 
 16 | from google.assistant.library import Assistant
 17 | from google.assistant.library.event import EventType
 18 | 
 19 | from slackbot import Bot
 20 | 
 21 | # logging.basicConfig(
 22 | #    level=logging.INFO,
 23 | #    format="[%(asctime)s] %(levelname)s:%(name)s:%(message)s"
 24 | # )
 25 | 
 26 | 
 27 | class Voicekitbot():
 28 |     def __init__(self):
 29 |         self.bot = Bot()
 30 |         self.gather_commands()
 31 | 
 32 |         credentials = aiy.assistant.auth_helpers.get_assistant_credentials()
 33 |         with Assistant(credentials) as assistant:
 34 |             for event in assistant.start():
 35 |                 self.process_event(assistant, event)
 36 | 
 37 |     def gather_commands(self):
 38 |         self.modules = {}
 39 |         self.actions = {}
 40 | 
 41 |         for root, dirs, files in os.walk("voicekitbot/commands"):
 42 |             for file in files:
 43 | 
 44 |                 if (not file.endswith(".py")) or file.startswith("__"):
 45 |                     continue
 46 | 
 47 |                 path_name = root.replace("voicekitbot/", "")
 48 |                 path_name = path_name.replace("/", ".")
 49 |                 module_name = file.replace(".py", "")
 50 | 
 51 |                 module = import_module("%s.%s" % (path_name, module_name))
 52 | 
 53 |                 init = getattr(module, "init")
 54 |                 init(config)
 55 | 
 56 |                 list_actions = getattr(module, "list_actions")
 57 | 
 58 |                 self.modules[module_name] = module
 59 |                 self.actions[module_name] = list_actions()
 60 | 
 61 |     def process_event(self, assistant, event):
 62 |         if event.type == EventType.ON_START_FINISHED:
 63 |             self.log('ready')
 64 |             if sys.stdout.isatty():
 65 |                 self.log('Say "OK, Google" then speak, or press Ctrl+C to quit...')
 66 | 
 67 |         elif event.type == EventType.ON_CONVERSATION_TURN_STARTED:
 68 |             self.log('listening')
 69 | 
 70 |         elif event.type == EventType.ON_END_OF_UTTERANCE:
 71 |             self.log('thinking')
 72 | 
 73 |         elif event.type == EventType.ON_CONVERSATION_TURN_FINISHED:
 74 |             self.log('ready')
 75 | 
 76 |         elif event.type == EventType.ON_ASSISTANT_ERROR and event.args and event.args['is_fatal']:
 77 |             self.log('something killed me...')
 78 |             sys.exit(1)
 79 | 
 80 |         elif event.type == EventType.ON_RECOGNIZING_SPEECH_FINISHED and event.args:
 81 |             self.process_command(assistant, event.args['text'])
 82 | 
 83 |     def process_command(self, assistant, text):
 84 |         text = text.lower()
 85 |         self.log("I think you just said '%s'" % text)
 86 |         for module_name, module in self.modules.items():
 87 |             for action in self.actions[module_name]:
 88 |                 for message in action["content"]:
 89 |                     if text == message:
 90 |                         assistant.stop_conversation()
 91 | 
 92 |                         func = getattr(module, action["function"])
 93 |                         command = func(text)
 94 | 
 95 |                         aiy.audio.say(command["say"])
 96 |                         self.bot.post(command["post"])
 97 | 
 98 |         if (text == "what can we do"):
 99 |             assistant.stop_conversation()
100 |             self.list_commands()
101 | 
102 |     def list_commands(self):
103 |         msg = "```Available commands:\r"
104 |         for module_name, module in self.modules.items():
105 |             msg += "\r%s:\r" % module_name
106 |             for action in self.actions[module_name]:
107 |                 msg += "- *%s*: %s\r" % (
108 |                     '/'.join(action["content"]), action["description"])
109 |         msg += "```"
110 |         self.log(msg)
111 | 
112 |     def log(self, message):
113 |         status_ui = aiy.voicehat.get_status_ui()
114 |         status_ui.status(message)
115 |         self.bot.log(message)
116 | 
117 | 
118 | voicekitboy = Voicekitbot()
119 | 


--------------------------------------------------------------------------------
/aiy/audio.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Drivers for audio functionality provided by the VoiceHat."""
 16 | 
 17 | import time
 18 | import wave
 19 | 
 20 | import aiy._drivers._player
 21 | import aiy._drivers._recorder
 22 | import aiy._drivers._tts
 23 | 
 24 | AUDIO_SAMPLE_SIZE = 2  # bytes per sample
 25 | AUDIO_SAMPLE_RATE_HZ = 16000
 26 | 
 27 | # Global variables. They are lazily initialized.
 28 | _voicehat_recorder = None
 29 | _voicehat_player = None
 30 | _status_ui = None
 31 | 
 32 | 
 33 | class _WaveDump(object):
 34 |     """A processor that saves recorded audio to a wave file."""
 35 | 
 36 |     def __init__(self, filepath, duration):
 37 |         self._wave = wave.open(filepath, 'wb')
 38 |         self._wave.setnchannels(1)
 39 |         self._wave.setsampwidth(2)
 40 |         self._wave.setframerate(16000)
 41 |         self._bytes = 0
 42 |         self._bytes_limit = int(duration * 16000) * 1 * 2
 43 | 
 44 |     def add_data(self, data):
 45 |         max_bytes = self._bytes_limit - self._bytes
 46 |         data = data[:max_bytes]
 47 |         self._bytes += len(data)
 48 |         if data:
 49 |             self._wave.writeframes(data)
 50 | 
 51 |     def is_done(self):
 52 |         return self._bytes >= self._bytes_limit
 53 | 
 54 |     def __enter__(self):
 55 |         return self
 56 | 
 57 |     def __exit__(self, *args):
 58 |         self._wave.close()
 59 | 
 60 | 
 61 | def get_player():
 62 |     """Returns a driver to control the VoiceHat speaker.
 63 | 
 64 |     The aiy modules automatically use this player. So usually you do not need to
 65 |     use this. Instead, use 'aiy.audio.play_wave' if you would like to play some
 66 |     audio.
 67 |     """
 68 |     global _voicehat_player
 69 |     if not _voicehat_player:
 70 |         _voicehat_player = aiy._drivers._player.Player()
 71 |     return _voicehat_player
 72 | 
 73 | 
 74 | def get_recorder():
 75 |     """Returns a driver to control the VoiceHat microphones.
 76 | 
 77 |     The aiy modules automatically use this recorder. So usually you do not need to
 78 |     use this.
 79 |     """
 80 |     global _voicehat_recorder
 81 |     if not _voicehat_recorder:
 82 |         _voicehat_recorder = aiy._drivers._recorder.Recorder()
 83 |     return _voicehat_recorder
 84 | 
 85 | 
 86 | def record_to_wave(filepath, duration):
 87 |     """Records an audio for the given duration to a wave file."""
 88 |     recorder = get_recorder()
 89 |     dumper = _WaveDump(filepath, duration)
 90 |     with recorder, dumper:
 91 |         recorder.add_processor(dumper)
 92 |         while not dumper.is_done():
 93 |             time.sleep(0.1)
 94 | 
 95 | 
 96 | def play_wave(wave_file):
 97 |     """Plays the given wave file.
 98 | 
 99 |     The wave file has to be mono and small enough to be loaded in memory.
100 |     """
101 |     player = get_player()
102 |     player.play_wav(wave_file)
103 | 
104 | 
105 | def play_audio(audio_data):
106 |     """Plays the given audio data."""
107 |     player = get_player()
108 |     player.play_bytes(audio_data, sample_width=AUDIO_SAMPLE_SIZE, sample_rate=AUDIO_SAMPLE_RATE_HZ)
109 | 
110 | 
111 | def say(words, lang=None):
112 |     """Says the given words in the given language with Google TTS engine.
113 | 
114 |     If lang is specified, e.g. "en-US', it will be used to say the given words.
115 |     Otherwise, the language from aiy.i18n will be used.
116 |     """
117 |     if not lang:
118 |         lang = aiy.i18n.get_language_code()
119 |     aiy._drivers._tts.say(aiy.audio.get_player(), words, lang=lang)
120 | 
121 | 
122 | def get_status_ui():
123 |     """Returns a driver to access the StatusUI daemon.
124 | 
125 |     The StatusUI daemon controls the LEDs in the background. It supports a list
126 |     of statuses it is able to communicate with the LED on the Voicehat.
127 |     """
128 |     global _status_ui
129 |     if not _status_ui:
130 |         _status_ui = aiy._drivers._StatusUi()
131 |     return _status_ui
132 | 


--------------------------------------------------------------------------------
/aiy/assistant/auth_helpers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright 2017 Google Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """Auth helpers for Google Assistant API."""
 17 | 
 18 | import json
 19 | import logging
 20 | import os
 21 | import os.path
 22 | import sys
 23 | 
 24 | import google_auth_oauthlib.flow
 25 | import google.auth.transport
 26 | import google.oauth2.credentials
 27 | 
 28 | 
 29 | _ASSISTANT_OAUTH_SCOPE = (
 30 |     'https://www.googleapis.com/auth/assistant-sdk-prototype'
 31 | )
 32 | 
 33 | # Legacy fallback: old locations of secrets/credentials.
 34 | _OLD_CLIENT_SECRETS = os.path.expanduser('~/client_secrets.json')
 35 | _OLD_SERVICE_CREDENTIALS = os.path.expanduser('~/credentials.json')
 36 | 
 37 | _CACHE_DIR = os.getenv('XDG_CACHE_HOME') or os.path.expanduser('~/.cache')
 38 | _VR_CACHE_DIR = os.path.join(_CACHE_DIR, 'voice-recognizer')
 39 | 
 40 | _ASSISTANT_CREDENTIALS = (
 41 |     os.path.join(_VR_CACHE_DIR, 'assistant_credentials.json')
 42 | )
 43 | 
 44 | # Expected location of the Assistant credentials file:
 45 | _ASSISTANT_CREDENTIALS_FILE = os.path.expanduser('~/assistant.json')
 46 | 
 47 | 
 48 | def _load_credentials(credentials_path):
 49 |     migrate = False
 50 |     with open(credentials_path, 'r') as f:
 51 |         credentials_data = json.load(f)
 52 |         if 'access_token' in credentials_data:
 53 |             migrate = True
 54 |             del credentials_data['access_token']
 55 |             credentials_data['scopes'] = [_ASSISTANT_OAUTH_SCOPE]
 56 |     if migrate:
 57 |         with open(credentials_path, 'w') as f:
 58 |             json.dump(credentials_data, f)
 59 |     credentials = google.oauth2.credentials.Credentials(token=None,
 60 |                                                         **credentials_data)
 61 |     http_request = google.auth.transport.requests.Request()
 62 |     credentials.refresh(http_request)
 63 |     return credentials
 64 | 
 65 | 
 66 | def _credentials_flow_interactive(client_secrets_path):
 67 |     flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
 68 |         client_secrets_path,
 69 |         scopes=[_ASSISTANT_OAUTH_SCOPE])
 70 |     if 'DISPLAY' in os.environ:
 71 |         credentials = flow.run_local_server()
 72 |     else:
 73 |         credentials = flow.run_console()
 74 |     return credentials
 75 | 
 76 | 
 77 | def _save_credentials(credentials_path, credentials):
 78 |     config_path = os.path.dirname(credentials_path)
 79 |     if not os.path.isdir(config_path):
 80 |         os.makedirs(config_path)
 81 |     with open(credentials_path, 'w') as f:
 82 |         json.dump({
 83 |             'refresh_token': credentials.refresh_token,
 84 |             'token_uri': credentials.token_uri,
 85 |             'client_id': credentials.client_id,
 86 |             'client_secret': credentials.client_secret,
 87 |             'scopes': credentials.scopes
 88 |         }, f)
 89 | 
 90 | 
 91 | def _try_to_get_credentials(client_secrets):
 92 |     """Try to get credentials, or print an error and quit on failure."""
 93 | 
 94 |     if os.path.exists(_ASSISTANT_CREDENTIALS):
 95 |         return _load_credentials(_ASSISTANT_CREDENTIALS)
 96 | 
 97 |     if not os.path.exists(_VR_CACHE_DIR):
 98 |         os.mkdir(_VR_CACHE_DIR)
 99 | 
100 |     if not os.path.exists(client_secrets) and os.path.exists(_OLD_CLIENT_SECRETS):
101 |         client_secrets = _OLD_CLIENT_SECRETS
102 | 
103 |     if not os.path.exists(client_secrets):
104 |         print('You need client secrets to use the Assistant API.')
105 |         print('Follow these instructions:')
106 |         print('    https://developers.google.com/api-client-library/python/auth/installed-app'
107 |               '#creatingcred')
108 |         print('and put the file at', client_secrets)
109 |         sys.exit(1)
110 | 
111 |     if not os.getenv('DISPLAY') and not sys.stdout.isatty():
112 |         print("""
113 | To use the Assistant API, manually start the application from the dev terminal.
114 | See the "Turn on the Assistant API" section of the Voice Recognizer
115 | User's Guide for more info.""")
116 |         sys.exit(1)
117 | 
118 |     credentials = _credentials_flow_interactive(client_secrets)
119 |     _save_credentials(_ASSISTANT_CREDENTIALS, credentials)
120 |     logging.info('OAuth credentials initialized: %s', _ASSISTANT_CREDENTIALS)
121 |     return credentials
122 | 
123 | 
124 | def get_assistant_credentials(credentials_file=None):
125 |     if not credentials_file:
126 |         credentials_file = _ASSISTANT_CREDENTIALS_FILE
127 |     return _try_to_get_credentials(credentials_file)
128 | 


--------------------------------------------------------------------------------
/aiy/_drivers/_led.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """LED driver for the VoiceHat."""
 16 | 
 17 | import itertools
 18 | import threading
 19 | import time
 20 | import RPi.GPIO as GPIO
 21 | 
 22 | 
 23 | class LED:
 24 |     """Starts a background thread to show patterns with the LED.
 25 | 
 26 |     Simple usage:
 27 |         my_led = LED(channel = 25)
 28 |         my_led.start()
 29 |         my_led.set_state(LED.BEACON)
 30 |         my_led.stop()
 31 |     """
 32 | 
 33 |     OFF = 0
 34 |     ON = 1
 35 |     BLINK = 2
 36 |     BLINK_3 = 3
 37 |     BEACON = 4
 38 |     BEACON_DARK = 5
 39 |     DECAY = 6
 40 |     PULSE_SLOW = 7
 41 |     PULSE_QUICK = 8
 42 | 
 43 |     def __init__(self, channel):
 44 |         self.animator = threading.Thread(target=self._animate, daemon=True)
 45 |         self.channel = channel
 46 |         self.iterator = None
 47 |         self.running = False
 48 |         self.state = None
 49 |         self.sleep = 0
 50 |         GPIO.setmode(GPIO.BCM)
 51 |         GPIO.setup(channel, GPIO.OUT)
 52 |         self.pwm = GPIO.PWM(channel, 100)
 53 |         self.lock = threading.Lock()
 54 | 
 55 |     def __del__(self):
 56 |         self.stop()
 57 |         GPIO.cleanup(self.channel)
 58 | 
 59 |     def start(self):
 60 |         """Start the LED driver."""
 61 |         with self.lock:  # pylint: disable=E1129
 62 |             if not self.running:
 63 |                 self.running = True
 64 |                 self.pwm.start(0)  # off by default
 65 |                 self.animator.start()
 66 | 
 67 |     def stop(self):
 68 |         """Stop the LED driver and sets the LED to off."""
 69 |         with self.lock:  # pylint: disable=E1129
 70 |             if self.running:
 71 |                 self.running = False
 72 |                 self.animator.join()
 73 |                 self.pwm.stop()
 74 | 
 75 |     def set_state(self, state):
 76 |         """Set the LED driver's new state.
 77 | 
 78 |         Note the LED driver must be started for this to have any effect.
 79 |         """
 80 |         with self.lock:  # pylint: disable=E1129
 81 |             self.state = state
 82 | 
 83 |     def _animate(self):
 84 |         while True:
 85 |             state = None
 86 |             running = False
 87 |             with self.lock:  # pylint: disable=E1129
 88 |                 state = self.state
 89 |                 self.state = None
 90 |                 running = self.running
 91 |             if not running:
 92 |                 return
 93 |             if state:
 94 |                 if not self._parse_state(state):
 95 |                     raise ValueError('unsupported state: %d' % state)
 96 |             if self.iterator:
 97 |                 self.pwm.ChangeDutyCycle(next(self.iterator))
 98 |                 time.sleep(self.sleep)
 99 |             else:
100 |                 # We can also wait for a state change here with a Condition.
101 |                 time.sleep(1)
102 | 
103 |     def _parse_state(self, state):
104 |         self.iterator = None
105 |         self.sleep = 0.0
106 |         handled = False
107 | 
108 |         if state == self.OFF:
109 |             self.pwm.ChangeDutyCycle(0)
110 |             handled = True
111 |         elif state == self.ON:
112 |             self.pwm.ChangeDutyCycle(100)
113 |             handled = True
114 |         elif state == self.BLINK:
115 |             self.iterator = itertools.cycle([0, 100])
116 |             self.sleep = 0.5
117 |             handled = True
118 |         elif state == self.BLINK_3:
119 |             self.iterator = itertools.cycle([0, 100] * 3 + [0, 0])
120 |             self.sleep = 0.25
121 |             handled = True
122 |         elif state == self.BEACON:
123 |             self.iterator = itertools.cycle(
124 |                 itertools.chain([30] * 100, [100] * 8, range(100, 30, -5)))
125 |             self.sleep = 0.05
126 |             handled = True
127 |         elif state == self.BEACON_DARK:
128 |             self.iterator = itertools.cycle(
129 |                 itertools.chain([0] * 100, range(0, 30, 3), range(30, 0, -3)))
130 |             self.sleep = 0.05
131 |             handled = True
132 |         elif state == self.DECAY:
133 |             self.iterator = itertools.cycle(range(100, 0, -2))
134 |             self.sleep = 0.05
135 |             handled = True
136 |         elif state == self.PULSE_SLOW:
137 |             self.iterator = itertools.cycle(
138 |                 itertools.chain(range(0, 100, 2), range(100, 0, -2)))
139 |             self.sleep = 0.1
140 |             handled = True
141 |         elif state == self.PULSE_QUICK:
142 |             self.iterator = itertools.cycle(
143 |                 itertools.chain(range(0, 100, 5), range(100, 0, -5)))
144 |             self.sleep = 0.05
145 |             handled = True
146 | 
147 |         return handled
148 | 


--------------------------------------------------------------------------------
/aiy/_drivers/_recorder.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """A recorder driver capable of recording voice samples from the VoiceHat microphones."""
 16 | 
 17 | import logging
 18 | import os
 19 | import subprocess
 20 | import threading
 21 | 
 22 | import aiy._drivers._alsa
 23 | 
 24 | logger = logging.getLogger('recorder')
 25 | 
 26 | 
 27 | class Recorder(threading.Thread):
 28 |     """A driver to record audio from the VoiceHat microphones.
 29 | 
 30 |     Stream audio from microphone in a background thread and run processing
 31 |     callbacks. It reads audio in a configurable format from the microphone,
 32 |     then converts it to a known format before passing it to the processors.
 33 | 
 34 |     This driver accumulates input (audio samples) in a local buffer. Once the
 35 |     buffer contains more than CHUNK_S seconds, it passes the chunk to all
 36 |     processors. An audio processor defines a 'add_data' method that receives
 37 |     the chunk of audio samples to process.
 38 |     """
 39 | 
 40 |     CHUNK_S = 0.1
 41 | 
 42 |     def __init__(self, input_device='default',
 43 |                  channels=1, bytes_per_sample=2, sample_rate_hz=16000):
 44 |         """Create a Recorder with the given audio format.
 45 | 
 46 |         The Recorder will not start until start() is called. start() is called
 47 |         automatically if the Recorder is used in a `with`-statement.
 48 | 
 49 |         - input_device: name of ALSA device (for a list, run `arecord -L`)
 50 |         - channels: number of channels in audio read from the mic
 51 |         - bytes_per_sample: sample width in bytes (eg 2 for 16-bit audio)
 52 |         - sample_rate_hz: sample rate in hertz
 53 |         """
 54 | 
 55 |         super().__init__(daemon=True)
 56 | 
 57 |         self._processors = []
 58 | 
 59 |         self._chunk_bytes = int(self.CHUNK_S * sample_rate_hz) * channels * bytes_per_sample
 60 | 
 61 |         self._cmd = [
 62 |             'arecord',
 63 |             '-q',
 64 |             '-t', 'raw',
 65 |             '-D', input_device,
 66 |             '-c', str(channels),
 67 |             # pylint: disable=W0212
 68 |             '-f', aiy._drivers._alsa.sample_width_to_string(bytes_per_sample),
 69 |             '-r', str(sample_rate_hz),
 70 |         ]
 71 |         self._arecord = None
 72 |         self._closed = False
 73 | 
 74 |     def add_processor(self, processor):
 75 |         """Add an audio processor.
 76 | 
 77 |         An audio processor is an object that has an 'add_data' method with the
 78 |         following signature:
 79 |         class MyProcessor(object):
 80 |           def __init__(self):
 81 |             ...
 82 | 
 83 |           def add_data(self, data):
 84 |             # processes the chunk of data here.
 85 | 
 86 |         The added processor may be called multiple times with chunks of audio data.
 87 |         """
 88 |         self._processors.append(processor)
 89 | 
 90 |     def remove_processor(self, processor):
 91 |         """Remove an added audio processor."""
 92 |         try:
 93 |             self._processors.remove(processor)
 94 |         except ValueError:
 95 |             logger.warn("processor was not found in the list")
 96 | 
 97 |     def run(self):
 98 |         """Reads data from arecord and passes to processors."""
 99 | 
100 |         self._arecord = subprocess.Popen(self._cmd, stdout=subprocess.PIPE)
101 |         logger.info("started recording")
102 | 
103 |         # Check for race-condition when __exit__ is called at the same time as
104 |         # the process is started by the background thread
105 |         if self._closed:
106 |             self._arecord.kill()
107 |             return
108 | 
109 |         this_chunk = b''
110 | 
111 |         while True:
112 |             input_data = self._arecord.stdout.read(self._chunk_bytes)
113 |             if not input_data:
114 |                 break
115 | 
116 |             this_chunk += input_data
117 |             if len(this_chunk) >= self._chunk_bytes:
118 |                 self._handle_chunk(this_chunk[:self._chunk_bytes])
119 |                 this_chunk = this_chunk[self._chunk_bytes:]
120 | 
121 |         if not self._closed:
122 |             logger.error('Microphone recorder died unexpectedly, aborting...')
123 |             # sys.exit doesn't work from background threads, so use os._exit as
124 |             # an emergency measure.
125 |             logging.shutdown()
126 |             os._exit(1)  # pylint: disable=protected-access
127 | 
128 |     def stop(self):
129 |         """Stops the recorder and cleans up all resources."""
130 |         self._closed = True
131 |         if self._arecord:
132 |             self._arecord.kill()
133 | 
134 |     def _handle_chunk(self, chunk):
135 |         """Send audio chunk to all processors."""
136 |         for p in self._processors:
137 |             p.add_data(chunk)
138 | 
139 |     def __enter__(self):
140 |         self.start()
141 |         return self
142 | 
143 |     def __exit__(self, *args):
144 |         self.stop()
145 | 


--------------------------------------------------------------------------------
/aiy/_apis/_speech.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Classes for speech interaction."""
 16 | 
 17 | from abc import abstractmethod
 18 | import collections
 19 | import logging
 20 | import os
 21 | import tempfile
 22 | import wave
 23 | 
 24 | import google.auth
 25 | import google.auth.exceptions
 26 | import google.auth.transport.grpc
 27 | import google.auth.transport.requests
 28 | from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
 29 | from google.rpc import code_pb2 as error_code
 30 | from google.assistant.embedded.v1alpha1 import embedded_assistant_pb2
 31 | import grpc
 32 | from six.moves import queue
 33 | 
 34 | import aiy.i18n
 35 | 
 36 | logger = logging.getLogger('speech')
 37 | 
 38 | AUDIO_SAMPLE_SIZE = 2  # bytes per sample
 39 | AUDIO_SAMPLE_RATE_HZ = 16000
 40 | 
 41 | # Expected location of the service credentials file:
 42 | SERVICE_CREDENTIALS = os.path.expanduser('~/cloud_speech.json')
 43 | 
 44 | 
 45 | _Result = collections.namedtuple('_Result', ['transcript', 'response_audio'])
 46 | 
 47 | 
 48 | class Error(Exception):
 49 |     pass
 50 | 
 51 | 
 52 | class _ChannelFactory(object):
 53 | 
 54 |     """Creates gRPC channels with a given configuration."""
 55 | 
 56 |     def __init__(self, api_host, credentials):
 57 |         self._api_host = api_host
 58 |         self._credentials = credentials
 59 | 
 60 |         self._checked = False
 61 | 
 62 |     def make_channel(self):
 63 |         """Creates a secure channel."""
 64 | 
 65 |         request = google.auth.transport.requests.Request()
 66 |         target = self._api_host + ':443'
 67 | 
 68 |         if not self._checked:
 69 |             # Refresh now, to catch any errors early. Otherwise, they'll be
 70 |             # raised and swallowed somewhere inside gRPC.
 71 |             self._credentials.refresh(request)
 72 |             self._checked = True
 73 | 
 74 |         return google.auth.transport.grpc.secure_authorized_channel(
 75 |             self._credentials, request, target)
 76 | 
 77 | 
 78 | class GenericSpeechRequest(object):
 79 | 
 80 |     """Common base class for Cloud Speech and Assistant APIs."""
 81 | 
 82 |     # TODO(rodrigoq): Refactor audio logging.
 83 |     # pylint: disable=attribute-defined-outside-init,too-many-instance-attributes
 84 | 
 85 |     DEADLINE_SECS = 185
 86 | 
 87 |     def __init__(self, api_host, credentials):
 88 |         self.dialog_follow_on = False
 89 |         self._audio_queue = queue.Queue()
 90 |         self._phrases = []
 91 |         self._channel_factory = _ChannelFactory(api_host, credentials)
 92 |         self._endpointer_cb = None
 93 |         self._audio_logging_enabled = False
 94 |         self._request_log_wav = None
 95 | 
 96 |     def add_phrases(self, phrases):
 97 |         """Makes the recognition more likely to recognize the given phrase(s).
 98 |         phrases: an object with a method get_phrases() that returns a list of
 99 |                  phrases.
100 |         """
101 | 
102 |         self._phrases.extend(phrases.get_phrases())
103 | 
104 |     def add_phrase(self, phrase):
105 |         """Makes the recognition more likely to recognize the given phrase."""
106 |         self._phrases.append(phrase)
107 | 
108 |     def set_endpointer_cb(self, cb):
109 |         """Callback to invoke on end of speech."""
110 |         self._endpointer_cb = cb
111 | 
112 |     def set_audio_logging_enabled(self, audio_logging_enabled=True):
113 |         self._audio_logging_enabled = audio_logging_enabled
114 | 
115 |         if audio_logging_enabled:
116 |             self._audio_log_dir = tempfile.mkdtemp()
117 |             self._audio_log_ix = 0
118 | 
119 |     def reset(self):
120 |         while True:
121 |             try:
122 |                 self._audio_queue.get(False)
123 |             except queue.Empty:
124 |                 return
125 | 
126 |         self.dialog_follow_on = False
127 | 
128 |     def add_data(self, data):
129 |         self._audio_queue.put(data)
130 | 
131 |     def end_audio(self):
132 |         self.add_data(None)
133 | 
134 |     def _get_speech_context(self):
135 |         """Return a SpeechContext instance to bias recognition towards certain
136 |         phrases.
137 |         """
138 |         return cloud_speech.SpeechContext(
139 |             phrases=self._phrases,
140 |         )
141 | 
142 |     @abstractmethod
143 |     def _make_service(self, channel):
144 |         """Create a service stub.
145 |         """
146 |         return
147 | 
148 |     @abstractmethod
149 |     def _create_config_request(self):
150 |         """Create a config request for the given endpoint.
151 | 
152 |         This is sent first to the server to configure the speech recognition.
153 |         """
154 |         return
155 | 
156 |     @abstractmethod
157 |     def _create_audio_request(self, data):
158 |         """Create an audio request for the given endpoint.
159 | 
160 |         This is sent to the server with audio to be recognized.
161 |         """
162 |         return
163 | 
164 |     def _request_stream(self):
165 |         """Yields a config request followed by requests constructed from the
166 |         audio queue.
167 |         """
168 |         yield self._create_config_request()
169 | 
170 |         while True:
171 |             data = self._audio_queue.get()
172 | 
173 |             if not data:
174 |                 return
175 | 
176 |             if self._request_log_wav:
177 |                 self._request_log_wav.writeframes(data)
178 | 
179 |             yield self._create_audio_request(data)
180 | 
181 |     @abstractmethod
182 |     def _create_response_stream(self, service, request_stream, deadline):
183 |         """Given a request stream, start the gRPC call to get the response
184 |         stream.
185 |         """
186 |         return
187 | 
188 |     @abstractmethod
189 |     def _stop_sending_audio(self, resp):
190 |         """Return true if this response says user has stopped speaking.
191 | 
192 |         This stops the request from sending further audio.
193 |         """
194 |         return
195 | 
196 |     @abstractmethod
197 |     def _handle_response(self, resp):
198 |         """Handle a response from the remote API.
199 | 
200 |         Args:
201 |             resp: StreamingRecognizeResponse instance
202 |         """
203 |         return
204 | 
205 |     def _end_audio_request(self):
206 |         self.end_audio()
207 |         if self._endpointer_cb:
208 |             self._endpointer_cb()
209 | 
210 |     def _handle_response_stream(self, response_stream):
211 |         for resp in response_stream:
212 |             if resp.error.code != error_code.OK:
213 |                 self._end_audio_request()
214 |                 raise Error('Server error: ' + resp.error.message)
215 | 
216 |             if self._stop_sending_audio(resp):
217 |                 self._end_audio_request()
218 | 
219 |             self._handle_response(resp)
220 | 
221 |         # Server has closed the connection
222 |         return self._finish_request() or ''
223 | 
224 |     def _start_logging_request(self):
225 |         """Open a WAV file to log the request audio."""
226 |         self._audio_log_ix += 1
227 |         request_filename = '%s/request.%03d.wav' % (
228 |             self._audio_log_dir, self._audio_log_ix)
229 |         logger.info('Writing request to %s', request_filename)
230 | 
231 |         self._request_log_wav = wave.open(request_filename, 'w')
232 | 
233 |         self._request_log_wav.setnchannels(1)
234 |         self._request_log_wav.setsampwidth(AUDIO_SAMPLE_SIZE)
235 |         self._request_log_wav.setframerate(AUDIO_SAMPLE_RATE_HZ)
236 | 
237 |     def _finish_request(self):
238 |         """Called after the final response is received."""
239 | 
240 |         if self._request_log_wav:
241 |             self._request_log_wav.close()
242 | 
243 |         return _Result(None, None)
244 | 
245 |     def do_request(self):
246 |         """Establishes a connection and starts sending audio to the cloud
247 |         endpoint. Responses are handled by the subclass until one returns a
248 |         result.
249 | 
250 |         Returns:
251 |             namedtuple with the following fields:
252 |                 transcript: string with transcript of user query
253 |                 response_audio: optionally, an audio response from the server
254 | 
255 |         Raises speech.Error on error.
256 |         """
257 |         try:
258 |             service = self._make_service(self._channel_factory.make_channel())
259 | 
260 |             response_stream = self._create_response_stream(
261 |                 service, self._request_stream(), self.DEADLINE_SECS)
262 | 
263 |             if self._audio_logging_enabled:
264 |                 self._start_logging_request()
265 | 
266 |             return self._handle_response_stream(response_stream)
267 |         except (
268 |                 google.auth.exceptions.GoogleAuthError,
269 |                 grpc.RpcError,
270 |         ) as exc:
271 |             raise Error('Exception in speech request') from exc
272 | 
273 | 
274 | class CloudSpeechRequest(GenericSpeechRequest):
275 | 
276 |     """A transcription request to the Cloud Speech API.
277 | 
278 |     Args:
279 |         credentials_file: path to service account credentials JSON file
280 |     """
281 | 
282 |     SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
283 | 
284 |     def __init__(self, credentials_file):
285 |         os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_file
286 |         credentials, _ = google.auth.default(scopes=[self.SCOPE])
287 | 
288 |         super().__init__('speech.googleapis.com', credentials)
289 | 
290 |         self.language_code = aiy.i18n.get_language_code()
291 | 
292 |         if not hasattr(cloud_speech, 'StreamingRecognizeRequest'):
293 |             raise ValueError("cloud_speech_pb2.py doesn't have StreamingRecognizeRequest.")
294 | 
295 |         self._transcript = None
296 | 
297 |     def reset(self):
298 |         super().reset()
299 |         self._transcript = None
300 | 
301 |     def _make_service(self, channel):
302 |         return cloud_speech.SpeechStub(channel)
303 | 
304 |     def _create_config_request(self):
305 |         recognition_config = cloud_speech.RecognitionConfig(
306 |             # There are a bunch of config options you can specify. See
307 |             # https://goo.gl/KPZn97 for the full list.
308 |             encoding='LINEAR16',  # raw 16-bit signed LE samples
309 |             sample_rate=AUDIO_SAMPLE_RATE_HZ,
310 |             # For a list of supported languages see:
311 |             # https://cloud.google.com/speech/docs/languages.
312 |             language_code=self.language_code,  # a BCP-47 language tag
313 |             speech_context=self._get_speech_context(),
314 |         )
315 |         streaming_config = cloud_speech.StreamingRecognitionConfig(
316 |             config=recognition_config,
317 |             single_utterance=True,  # TODO(rodrigoq): find a way to handle pauses
318 |         )
319 | 
320 |         return cloud_speech.StreamingRecognizeRequest(
321 |             streaming_config=streaming_config)
322 | 
323 |     def _create_audio_request(self, data):
324 |         return cloud_speech.StreamingRecognizeRequest(audio_content=data)
325 | 
326 |     def _create_response_stream(self, service, request_stream, deadline):
327 |         return service.StreamingRecognize(request_stream, deadline)
328 | 
329 |     def _stop_sending_audio(self, resp):
330 |         """Check the endpointer type to see if an utterance has ended."""
331 | 
332 |         if resp.endpointer_type:
333 |             endpointer_type = cloud_speech.StreamingRecognizeResponse.EndpointerType.Name(
334 |                 resp.endpointer_type)
335 |             logger.info('endpointer_type: %s', endpointer_type)
336 | 
337 |         END_OF_AUDIO = cloud_speech.StreamingRecognizeResponse.EndpointerType.Value('END_OF_AUDIO')
338 |         return resp.endpointer_type == END_OF_AUDIO
339 | 
340 |     def _handle_response(self, resp):
341 |         """Store the last transcript we received."""
342 |         if resp.results:
343 |             self._transcript = ' '.join(
344 |                 result.alternatives[0].transcript for result in resp.results)
345 |             logger.info('transcript: %s', self._transcript)
346 | 
347 |     def _finish_request(self):
348 |         super()._finish_request()
349 |         return _Result(self._transcript, None)
350 | 
351 | 
352 | class AssistantSpeechRequest(GenericSpeechRequest):
353 | 
354 |     """A request to the Assistant API, which returns audio and text."""
355 | 
356 |     def __init__(self, credentials):
357 | 
358 |         super().__init__('embeddedassistant.googleapis.com', credentials)
359 | 
360 |         self._conversation_state = None
361 |         self._response_audio = b''
362 |         self._transcript = None
363 | 
364 |     def reset(self):
365 |         super().reset()
366 |         self._response_audio = b''
367 |         self._transcript = None
368 | 
369 |     def _make_service(self, channel):
370 |         return embedded_assistant_pb2.EmbeddedAssistantStub(channel)
371 | 
372 |     def _create_config_request(self):
373 |         audio_in_config = embedded_assistant_pb2.AudioInConfig(
374 |             encoding='LINEAR16',
375 |             sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ,
376 |         )
377 |         audio_out_config = embedded_assistant_pb2.AudioOutConfig(
378 |             encoding='LINEAR16',
379 |             sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ,
380 |             volume_percentage=50,
381 |         )
382 |         converse_state = embedded_assistant_pb2.ConverseState(
383 |             conversation_state=self._conversation_state,
384 |         )
385 |         converse_config = embedded_assistant_pb2.ConverseConfig(
386 |             audio_in_config=audio_in_config,
387 |             audio_out_config=audio_out_config,
388 |             converse_state=converse_state,
389 |         )
390 | 
391 |         return embedded_assistant_pb2.ConverseRequest(config=converse_config)
392 | 
393 |     def _create_audio_request(self, data):
394 |         return embedded_assistant_pb2.ConverseRequest(audio_in=data)
395 | 
396 |     def _create_response_stream(self, service, request_stream, deadline):
397 |         return service.Converse(request_stream, deadline)
398 | 
399 |     def _stop_sending_audio(self, resp):
400 |         if resp.event_type:
401 |             logger.info('event_type: %s', resp.event_type)
402 | 
403 |         return (resp.event_type ==
404 |                 embedded_assistant_pb2.ConverseResponse.END_OF_UTTERANCE)
405 | 
406 |     def _handle_response(self, resp):
407 |         """Accumulate audio and text from the remote end. It will be handled
408 |         in _finish_request().
409 |         """
410 | 
411 |         if resp.result.spoken_request_text:
412 |             logger.info('transcript: %s', resp.result.spoken_request_text)
413 |             self._transcript = resp.result.spoken_request_text
414 | 
415 |         self._response_audio += resp.audio_out.audio_data
416 | 
417 |         if resp.result.conversation_state:
418 |             self._conversation_state = resp.result.conversation_state
419 | 
420 |         if resp.result.microphone_mode:
421 |             self.dialog_follow_on = (
422 |                 resp.result.microphone_mode ==
423 |                 embedded_assistant_pb2.ConverseResult.DIALOG_FOLLOW_ON)
424 | 
425 |     def _finish_request(self):
426 |         super()._finish_request()
427 | 
428 |         if self._response_audio and self._audio_logging_enabled:
429 |             self._log_audio_out(self._response_audio)
430 | 
431 |         return _Result(self._transcript, self._response_audio)
432 | 
433 |     def _log_audio_out(self, frames):
434 |         response_filename = '%s/response.%03d.wav' % (
435 |             self._audio_log_dir, self._audio_log_ix)
436 |         logger.info('Writing response to %s', response_filename)
437 | 
438 |         response_wav = wave.open(response_filename, 'w')
439 |         response_wav.setnchannels(1)
440 |         response_wav.setsampwidth(AUDIO_SAMPLE_SIZE)
441 |         response_wav.setframerate(AUDIO_SAMPLE_RATE_HZ)
442 |         response_wav.writeframes(frames)
443 |         response_wav.close()
444 | 
445 | 
446 | if __name__ == '__main__':
447 |     logging.basicConfig(level=logging.INFO)
448 | 
449 |     # for testing: use audio from a file
450 |     import argparse
451 |     parser = argparse.ArgumentParser()
452 |     parser.add_argument('file', nargs='?', default='test_speech.raw')
453 |     args = parser.parse_args()
454 | 
455 |     req = CloudSpeechRequest(SERVICE_CREDENTIALS)
456 | 
457 |     with open(args.file, 'rb') as f:
458 |         while True:
459 |             chunk = f.read(64000)
460 |             if not chunk:
461 |                 break
462 |             req.add_data(chunk)
463 |     req.end_audio()
464 | 
465 |     print('down response:', req.do_request())
466 | 


--------------------------------------------------------------------------------