├── version
├── helpers
├── __init__.py
├── browser_helpers.py
├── assistant_helpers.py
├── device_helpers.py
└── audio_helpers.py
├── config.ini
├── snowboy.tar.gz
├── Bachelor_thesis_kenan_ekici.pdf
├── .idea
├── misc.xml
├── vcs.xml
├── modules.xml
├── pepper-google-assistant.iml
└── workspace.xml
├── requirements.txt
├── __init__.py
├── static
└── styles
│ └── layout.css
├── README.md
├── .gitignore
├── templates
├── index.html
├── stt.html
└── tts.html
├── gestures.txt
├── webserver.py
├── README.rst
├── sdk
├── audiofileinput.py
├── textinput.py
└── devicetool.py
├── LICENSE
└── assistant.py
/version:
--------------------------------------------------------------------------------
1 | 1.0.4
2 |
--------------------------------------------------------------------------------
/helpers/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
1 | [IP]
2 | Host = 192.168.3.197
3 | Port = 3000
4 | Robot = 192.168.3.146
5 |
6 |
--------------------------------------------------------------------------------
/snowboy.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenanEkici/pepper-google-assistant/HEAD/snowboy.tar.gz
--------------------------------------------------------------------------------
/Bachelor_thesis_kenan_ekici.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenanEkici/pepper-google-assistant/HEAD/Bachelor_thesis_kenan_ekici.pdf
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | google-assistant-grpc==0.1.0
2 | google-assistant-library==0.1.1
3 | google-assistant-sdk==0.4.4
4 | google-auth==1.0.1
5 | google-auth-oauthlib==0.2.0
6 | sounddevice==0.3.11
7 | click==6.7
8 | tenacity==4.12.0
9 | futures==3.2.0
10 | pathlib2==2.3.0
11 | pyaudio==0.2.11
12 | pyasn1==0.4.2
13 | gtts==1.2.2
14 | flask==0.12.2
15 | flas-socketio==2.9.6
16 | configparser==3.5.0
17 |
18 |
--------------------------------------------------------------------------------
/.idea/pepper-google-assistant.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
15 |
16 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Samples for Google Assistant gRPC API."""
16 |
--------------------------------------------------------------------------------
/static/styles/layout.css:
--------------------------------------------------------------------------------
1 | .message {
2 | border-radius: 50px;
3 | padding: 15px 20px;
4 | position: relative;
5 | font-weight: bold;
6 | font-size: 18px;
7 | text-align: center;
8 | list-style-type: none;
9 | }
10 |
11 | .right {
12 | margin: 0 15px 10px 50%;
13 | background-color: #2095FE;
14 | color: #fff;
15 | }
16 |
17 | .left {
18 | margin: 0 50% 10px 15px;
19 | background-color: #d7dde8;
20 | }
21 |
22 | .footer {
23 | position: absolute;
24 | bottom: 0;
25 | width: 100%;
26 | height: 60px; /* Set the fixed height of the footer here */
27 | line-height: 60px; /* Vertically center the text there */
28 | background-color: #f5f5f5;
29 | }
30 |
31 | .vertical-align {
32 | display: flex;
33 | align-items: center;
34 | }
35 |
36 | h1 {
37 | text-align: center;
38 | }
39 |
40 | /*
41 | .message.to + .message.to,
42 | .message.from + .message.from {
43 | margin-top: -10px;
44 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pepper-google-assistant
2 |
3 | The basic objective of this project was to enable Google Assistant on the Pepper Robot. Because of a lack of root access on the robot, we had to find alternative ways to make this work such as using the notorious PYNAOqi framework which provided no actual guarantee of working at all times.
4 |
5 | We had also developed a simple front end to either follow the interaction between the robot and any other person and also to synthesize speech using several Text-to-Speech API's.
6 |
7 | ### What we have achieved
8 |
9 | - Build a custom Google Assistant solution for the Pepper humanoid Robot.
10 | - Train a simple voice trigger command on which the Pepper robot will start listening when its name ("pepper") has been said.
11 | - Combine both these solutions in one solution.
12 | - Build a front end for both the Google Assistant solution and the Text-to-Speech functionality.
13 |
14 | ### Notes
15 |
16 | Please refer to the my bachelor thesis [Teach Robots to speak: Text 2 Speech Solutions for Robotics](https://www.researchgate.net/publication/359379920_Teach_Robots_to_speak_Text_2_Speech_Solutions_for_Robotics) for more details about this project.
17 |
18 |
--------------------------------------------------------------------------------
/helpers/browser_helpers.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2018 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import os.path
16 | import tempfile
17 | import webbrowser
18 |
19 | ASSISTANT_HTML_FILE = 'google-assistant-sdk-screen-out.html'
20 |
21 |
22 | class SystemBrowser(object):
23 | def __init__(self):
24 | self.tempdir = tempfile.mkdtemp()
25 | self.filename = os.path.join(self.tempdir, ASSISTANT_HTML_FILE)
26 |
27 | def display(self, html):
28 | with open(self.filename, 'wb') as f:
29 | f.write(html)
30 | webbrowser.open(self.filename, new=0)
31 |
32 |
33 | system_browser = SystemBrowser()
34 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # response
10 | snowboy/
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # pyenv
79 | .python-version
80 |
81 | # celery beat schedule file
82 | celerybeat-schedule
83 |
84 | # SageMath parsed files
85 | *.sage.py
86 |
87 | # Environments
88 | .env
89 | .venv
90 | env/
91 | venv/
92 | ENV/
93 | env.bak/
94 | venv.bak/
95 |
96 | # Spyder project settings
97 | .spyderproject
98 | .spyproject
99 |
100 | # Rope project settings
101 | .ropeproject
102 |
103 | # mkdocs documentation
104 | /site
105 |
106 | # mypy
107 | .mypy_cache/
108 |
--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Google Assistant
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
27 |
28 |
29 |
30 |
Google Assistant
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/gestures.txt:
--------------------------------------------------------------------------------
1 | gestures = [['animations/Stand/Gestures/Hey_1', 'animations/Stand/Gestures/Hey_1'],
2 | ['animations/Stand/Gestures/IDontKnow_1'],
3 | ['animations/Stand/Gestures/Me_1', 'animations/Stand/Gestures/You_1'],
4 | ['animations/Stand/Gestures/Please_1'],
5 | ['animations/Stand/Gestures/Nothing_2'],
6 | ['animations/Stand/Gestures/Everything_1'],
7 | ['animations/Stand/Gestures/Yes_1'],
8 | ['animations/Stand/Gestures/CalmDown_1', 'animations/Stand/Gestures/Desperate_1', 'animations/Stand/Gestures/Desperate_2', 'animations/Stand/Gestures/Desperate_4', 'animations/Stand/Gestures/Desperate_5'],
9 | ['animations/Stand/Gestures/Think_1'],
10 | ['animations/Stand/Gestures/Happy_4'],
11 | ['animations/Stand/Gestures/Yes_1', 'animations/Stand/Gestures/Yes_2', 'animations/Stand/Gestures/Yes_3'],
12 | ['animations/Stand/Gestures/Explain_1', 'animations/Stand/Gestures/Explain_2', 'animations/Stand/Gestures/Explain_3', 'animations/Stand/Gestures/Explain_4', 'animations/Stand/Gestures/Explain_5', 'animations/Stand/Gestures/Explain_6', 'animations/Stand/Gestures/Explain_7', 'animations/Stand/Gestures/Explain_8', 'animations/Stand/Gestures/Explain_10', 'animations/Stand/Gestures/Explain_11'],
13 | ['animations/Stand/Waiting/Think_1', 'animations/Stand/Waiting/Think_2', 'animations/Stand/Waiting/Think_3']]
14 |
15 |
16 | def animation_pepper(self, gesture, session):
17 | animation_player_service = session.service("ALAnimationPlayer")
18 | print('test1')
19 | if 'hi' in gesture or 'hey' in gesture:
20 | print('test2')
21 | gest = gestures[0][randint(0, len(gestures[0]) - 1)]
22 | animation_player_service.run(gest, _async=True)
23 |
24 |
25 |
26 | def naoqi_session(self):
27 | session = qi.Session()
28 | try:
29 | session.connect("tcp://" + '192.168.3.146' + ":" + '9559')
30 | except RuntimeError:
31 | print ("Can't connect to Naoqi")
32 | sys.exit(1)
33 | return session
--------------------------------------------------------------------------------
/helpers/assistant_helpers.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Helper functions for the Google Assistant API."""
16 |
17 | import logging
18 |
19 | from google.assistant.embedded.v1alpha2 import embedded_assistant_pb2
20 |
21 |
22 | def log_assist_request_without_audio(assist_request):
23 | """Log AssistRequest fields without audio data."""
24 | if logging.getLogger().isEnabledFor(logging.DEBUG):
25 | resp_copy = embedded_assistant_pb2.AssistRequest()
26 | resp_copy.CopyFrom(assist_request)
27 | if len(resp_copy.audio_in) > 0:
28 | size = len(resp_copy.audio_in)
29 | resp_copy.ClearField('audio_in')
30 | logging.debug('AssistRequest: audio_in (%d bytes)',
31 | size)
32 | return
33 | logging.debug('AssistRequest: %s', resp_copy)
34 |
35 |
36 | def log_assist_response_without_audio(assist_response):
37 | """Log AssistResponse fields without audio data."""
38 | if logging.getLogger().isEnabledFor(logging.DEBUG):
39 | resp_copy = embedded_assistant_pb2.AssistResponse()
40 | resp_copy.CopyFrom(assist_response)
41 | has_audio_data = (resp_copy.HasField('audio_out') and
42 | len(resp_copy.audio_out.audio_data) > 0)
43 | if has_audio_data:
44 | size = len(resp_copy.audio_out.audio_data)
45 | resp_copy.audio_out.ClearField('audio_data')
46 | if resp_copy.audio_out.ListFields():
47 | logging.debug('AssistResponse: %s audio_data (%d bytes)',
48 | resp_copy,
49 | size)
50 | else:
51 | logging.debug('AssistResponse: audio_data (%d bytes)',
52 | size)
53 | return
54 | logging.debug('AssistResponse: %s', resp_copy)
55 |
--------------------------------------------------------------------------------
/templates/stt.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Frontend SST
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
49 |
50 |
51 |
52 |
Speech-To-Text and Interactive Questions
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/helpers/device_helpers.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Helper functions for the Device Actions."""
16 |
17 | import concurrent.futures
18 | import logging
19 | import sys
20 |
21 |
22 | key_inputs_ = 'inputs'
23 | key_intent_ = 'intent'
24 | key_payload_ = 'payload'
25 | key_commands_ = 'commands'
26 | key_id_ = 'id'
27 |
28 |
29 | class DeviceRequestHandler(object):
30 | """Asynchronous dispatcher for Device actions commands.
31 |
32 | Dispatch commands to the given device handlers.
33 |
34 | Args:
35 | device_id: device id to match command against
36 |
37 | Example:
38 | # Use as as decorator to register handler.
39 | device_handler = DeviceRequestHandler('my-device')
40 | @device_handler.command('INTENT_NAME')
41 | def handler(param):
42 | pass
43 | """
44 |
45 | def __init__(self, device_id):
46 | self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
47 | self.device_id = device_id
48 | self.handlers = {}
49 |
50 | def __call__(self, device_request):
51 | """Handle incoming device request.
52 |
53 | Returns: List of concurrent.futures for each command execution.
54 | """
55 | fs = []
56 | if key_inputs_ in device_request:
57 | for input in device_request[key_inputs_]:
58 | if input[key_intent_] == 'action.devices.EXECUTE':
59 | for command in input[key_payload_][key_commands_]:
60 | fs.extend(self.submit_commands(**command))
61 | return fs
62 |
63 | def command(self, intent):
64 | """Register a device action handlers."""
65 | def decorator(fn):
66 | self.handlers[intent] = fn
67 | return decorator
68 |
69 | def submit_commands(self, devices, execution):
70 | """Submit device command executions.
71 |
72 | Returns: a list of concurrent.futures for scheduled executions.
73 | """
74 | fs = []
75 | for device in devices:
76 | if device[key_id_] != self.device_id:
77 | logging.warning('Ignoring command for unknown device: %s'
78 | % device[key_id_])
79 | continue
80 | if not execution:
81 | logging.warning('Ignoring noop execution')
82 | continue
83 | for command in execution:
84 | f = self.executor.submit(
85 | self.dispatch_command, **command
86 | )
87 | fs.append(f)
88 | return fs
89 |
90 | def dispatch_command(self, command, params=None):
91 | """Dispatch device commands to the appropriate handler."""
92 | try:
93 | if command in self.handlers:
94 | self.handlers[command](**params)
95 | else:
96 | logging.warning('Unsupported command: %s: %s',
97 | command, params)
98 | except Exception as e:
99 | logging.warning('Error during command execution',
100 | exc_info=sys.exc_info())
101 | raise e
102 |
--------------------------------------------------------------------------------
/templates/tts.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Text-to-Speech dashboard
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
68 |
69 |
70 |
71 |
Text-to-Speech dashboard
72 |
73 |
74 |
Pepper Text-to-Speech
75 |
76 |
77 |
78 |
79 |
80 |
81 |
GTTS Text-to-Speech
82 |
83 |
84 |
85 |
86 |
87 |
113 |
114 |
115 |
116 |
117 |
--------------------------------------------------------------------------------
/webserver.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, request, send_file, render_template, jsonify
2 | import urllib3
3 | from gtts import gTTS
4 | import configparser
5 | import json
6 | import requests
7 | import base64
8 | from naoqi import ALProxy
9 | from flask_socketio import SocketIO
10 | from threading import Thread
11 | import pyaudio
12 | import wave
13 |
14 |
15 | my_server = Flask(__name__)
16 | socketio = SocketIO(my_server)
17 | stream = None
18 | wavstream = None
19 | pepper = None
20 | host = None
21 | port = None
22 |
23 |
24 | @my_server.route("/", methods=['GET'])
25 | def root():
26 | return render_template('index.html')
27 |
28 |
29 | @my_server.route("/texttospeech", methods=['GET'])
30 | def tts():
31 | return render_template('tts.html')
32 |
33 |
34 | @my_server.route("/speechtotext", methods=['GET'])
35 | def stt():
36 | return render_template('stt.html')
37 |
38 |
39 | @my_server.route("/googlestt", methods=['POST'])
40 | def gsst():
41 | headers = {
42 | 'Content-Type': 'application/json'
43 | }
44 | start_record()
45 | with open('/tmp/input.wav', 'rb') as f1:
46 | content = base64.b64encode(f1.read())
47 |
48 | dic = {
49 | "config": {
50 | "encoding": "LINEAR16",
51 | "languageCode": "en-US",
52 | "enableAutomaticPunctuation": 'true',
53 | "sampleRateHertz": 16000,
54 | "model": "default"
55 | },
56 |
57 | "audio": {
58 | "content": content
59 | }
60 | }
61 |
62 | resp = requests.post(
63 | "https://cxl-services.appspot.com/proxy?url=https%3A%2F%2Fspeech.googleapis.com%2Fv1p1beta1%2Fspeech%3Arecognize",
64 | headers=headers, data=json.dumps(dic))
65 | json_data = resp.json()
66 | transcript = json_data['results'][0]['alternatives'][0]['transcript']
67 | socketio.emit('inputmsg', transcript)
68 | return 'success'
69 |
70 |
71 | @my_server.route("/pepper", methods=['POST'])
72 | def pepper():
73 | req = request.json.get('input')
74 | altts = ALProxy("ALTextToSpeech", pepper, 9559)
75 | altts.say(str(req))
76 | return "success"
77 |
78 |
79 | @my_server.route("/gtts", methods=['POST'])
80 | def gtts():
81 | req = request.json.get('input')
82 | googletts = gTTS(text=req, lang='en')
83 | googletts.save("/tmp/syn.mp3")
84 | return "success"
85 |
86 |
87 | @my_server.route("/gcloud", methods=['POST'])
88 | def gcloud():
89 | headers = {
90 | 'Content-Type': 'application/json'
91 | }
92 | resp = requests.post("https://cxl-services.appspot.com/proxy?url=https%3A%2F%2Ftexttospeech.googleapis.com%2Fv1beta1%2Ftext%3Asynthesize", headers=headers, data=json.dumps(request.get_json()))
93 | r = resp.json().get('audioContent')
94 | with open("/tmp/syn.mp3", 'w') as file:
95 | file.write(base64.decodestring(r))
96 | return "success"
97 |
98 |
99 | @my_server.route("/playpepper", methods=['GET'])
100 | def play_stream():
101 | t = Thread(target=play_pepper)
102 | t.start()
103 | return "success"
104 |
105 |
106 | @my_server.route("/stream", methods=['GET'])
107 | def stream_mp3():
108 | return send_file('/tmp/syn.mp3', cache_timeout=0)
109 |
110 |
111 | @my_server.route("/wavstream", methods=['GET'])
112 | def stream_wav():
113 | return send_file('/tmp/syn.wav', cache_timeout=0)
114 |
115 |
116 | def emit_socket(msg_type, msg):
117 | socketio.emit(msg_type, msg)
118 |
119 |
120 | def play_pepper():
121 | audio = ALProxy("ALAudioPlayer", pepper, 9559)
122 | audio.playWebStream(stream, 1, 0)
123 |
124 |
125 | def play_asistant_response():
126 | audio = ALProxy("ALAudioPlayer", pepper, 9559)
127 | audio.playWebStream(wavstream, 1, 0)
128 |
129 |
130 | def start_record():
131 | FORMAT = pyaudio.paInt16
132 | CHANNELS = 1
133 | RATE = 16000
134 | CHUNK = 1024
135 | RECORD_SECONDS = 10
136 | WAVE_OUTPUT_FILENAME = "/tmp/input.wav"
137 | audio = pyaudio.PyAudio()
138 |
139 | # start recording
140 | wav_stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
141 | print "recording..."
142 | frames = []
143 |
144 | for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
145 | data = wav_stream.read(CHUNK)
146 | frames.append(data)
147 | print "finished recording"
148 |
149 | # stop recording
150 | wav_stream.stop_stream()
151 | wav_stream.close()
152 | audio.terminate()
153 |
154 | wave_file = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
155 | wave_file.setnchannels(CHANNELS)
156 | wave_file.setsampwidth(audio.get_sample_size(FORMAT))
157 | wave_file.setframerate(RATE)
158 | wave_file.writeframes(b''.join(frames))
159 | wave_file.close()
160 |
161 |
162 | def start_server():
163 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
164 | global stream, wavstream, pepper, host, port
165 | config = configparser.ConfigParser()
166 | config.read('config.ini')
167 |
168 | host = config['IP']['Host']
169 | port = config['IP']['Port']
170 |
171 | stream = "http://" + str(host) + ":" + str(port) + "/stream"
172 | wavstream = "http://" + str(host) + ":" + str(port) + "/wavstream"
173 | pepper = str(config['IP']['Robot'])
174 |
175 | print("API is running")
176 | socketio.run(my_server, host=host, port=int(port))
177 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | Python Samples for the Google Assistant gRPC API
2 | ================================================
3 |
4 | This repository contains a reference sample for the ``google-assistant-grpc`` Python package_.
5 |
6 | It implements the following features:
7 |
8 | - Triggering a conversation using a key press
9 | - Audio recording of user queries (single or multiple consecutive queries)
10 | - Playback of the Assistant response
11 | - Conversation state management
12 | - Volume control
13 |
14 | .. _package: https://pypi.python.org/pypi/google-assistant-grpc
15 |
16 | Prerequisites
17 | -------------
18 |
19 | - `Python `_ (>= 3.4 recommended)
20 | - An `Actions Console Project `_
21 | - A `Google account `_
22 |
23 | Setup
24 | -----
25 |
26 | - Install Python 3
27 |
28 | - Ubuntu/Debian GNU/Linux::
29 |
30 | sudo apt-get update
31 | sudo apt-get install python3 python3-venv
32 |
33 | - `MacOSX, Windows, Other `_
34 |
35 | - Create a new virtual environment (recommended)::
36 |
37 | python3 -m venv env
38 | env/bin/python -m pip install --upgrade pip setuptools wheel
39 | source env/bin/activate
40 |
41 | Authorization
42 | -------------
43 |
44 | - Follow the steps to `configure the Actions Console project and the Google account `_.
45 | - Follow the steps to `register a new device model and download the client secrets file `_.
46 | - Generate device credentials using ``google-oauthlib-tool``:
47 |
48 | pip install --upgrade google-auth-oauthlib[tool]
49 | google-oauthlib-tool --client-secrets path/to/credentials.json --scope https://www.googleapis.com/auth/assistant-sdk-prototype --save --headless
50 |
51 | Run the samples
52 | ---------------
53 |
54 | - Install the sample dependencies::
55 |
56 | sudo apt-get install portaudio19-dev libffi-dev libssl-dev
57 | pip install --upgrade -r requirements.txt
58 |
59 | - Verify audio setup::
60 |
61 | # Record a 5 sec sample and play it back
62 | python -m audio_helpers
63 |
64 | - Run the push to talk sample. The sample records a voice query after a key press and plays back the Google Assistant's answer::
65 |
66 | python -m pushtotalk --device-id 'my-device-identifier' --device-model-id 'my-model-identifier'
67 |
68 | - Try some Google Assistant voice query like "What time is it?" or "Who am I?".
69 |
70 | - Try a device action query like "Turn on".
71 |
72 | - Run in verbose mode to see the gRPC communication with the Google Assistant API::
73 |
74 | python -m pushtotalk --device-id 'my-device-identifier' --device-model-id 'my-model-identifier' -v
75 |
76 | - Send a pre-recorded request to the Assistant::
77 |
78 | python -m pushtotalk --device-id 'my-device-identifier' --device-model-id 'my-model-identifier' -i in.wav
79 |
80 | - Save the Assistant response to a file::
81 |
82 | python -m pushtotalk --device-id 'my-device-identifier' --device-model-id 'my-model-identifier' -o out.wav
83 |
84 | - Send text requests to the Assistant::
85 |
86 | python -m textinput --device-id 'my-device-identifier' --device-model-id 'my-model-identifier'
87 |
88 | - Send a request to the Assistant from a local audio file and write the Assistant audio response to another file::
89 |
90 | python -m audiofileinput --device-id 'my-device-identifier' --device-model-id 'my-model-identifier' -i in.wav -o out.wav
91 |
92 | Troubleshooting
93 | ---------------
94 |
95 | - Verify ALSA setup::
96 |
97 | # Play a test sound
98 | speaker-test -t wav
99 |
100 | # Record and play back some audio using ALSA command-line tools
101 | arecord --format=S16_LE --duration=5 --rate=16000 --file-type=raw out.raw
102 | aplay --format=S16_LE --rate=16000 --file-type=raw out.raw
103 |
104 | - If Assistant audio is choppy, try adjusting the sound device's block size::
105 |
106 | # If using a USB speaker or dedicated soundcard, set block size to "0"
107 | # to automatically adjust the buffer size
108 | python -m audio_helpers --audio-block-size=0
109 |
110 | # If using the line-out 3.5mm audio jack on the device, set block size
111 | # to a value larger than the `ConverseResponse` audio payload size
112 | python -m audio_helpers --audio-block-size=3200
113 |
114 | # Run the Assistant sample using the best block size value found above
115 | python -m pushtotalk --audio-block-size=value
116 |
117 | - If Assistant audio is truncated, try adjusting the sound device's flush size::
118 |
119 | # Set flush size to a value larger than the audio block size. You can
120 | # run the sample using the --audio-flush-size flag as well.
121 | python -m audio_helpers --audio-block-size=3200 --audio-flush-size=6400
122 |
123 | See also the `troubleshooting section `_ of the official documentation.
124 |
125 | License
126 | -------
127 |
128 | Copyright (C) 2017 Google Inc.
129 |
130 | Licensed to the Apache Software Foundation (ASF) under one or more contributor
131 | license agreements. See the NOTICE file distributed with this work for
132 | additional information regarding copyright ownership. The ASF licenses this
133 | file to you under the Apache License, Version 2.0 (the "License"); you may not
134 | use this file except in compliance with the License. You may obtain a copy of
135 | the License at
136 |
137 | http://www.apache.org/licenses/LICENSE-2.0
138 |
139 | Unless required by applicable law or agreed to in writing, software
140 | distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
141 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
142 | License for the specific language governing permissions and limitations under
143 | the License.
144 |
--------------------------------------------------------------------------------
/sdk/audiofileinput.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2018 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Simple file-based sample for the Google Assistant Service."""
16 |
17 | import json
18 | import logging
19 | import os
20 | import os.path
21 | import sys
22 |
23 | import click
24 | import google.auth.transport.grpc
25 | import google.auth.transport.requests
26 | import google.oauth2.credentials
27 |
28 | from google.assistant.embedded.v1alpha2 import (
29 | embedded_assistant_pb2,
30 | embedded_assistant_pb2_grpc
31 | )
32 |
33 |
34 | END_OF_UTTERANCE = embedded_assistant_pb2.AssistResponse.END_OF_UTTERANCE
35 |
36 |
37 | @click.command()
38 | @click.option('--api-endpoint', default='embeddedassistant.googleapis.com',
39 | metavar='', show_default=True,
40 | help='Address of Google Assistant API service.')
41 | @click.option('--credentials',
42 | metavar='', show_default=True,
43 | default=os.path.join(click.get_app_dir('google-oauthlib-tool'),
44 | 'credentials.json'),
45 | help='Path to read OAuth2 credentials.')
46 | @click.option('--device-model-id', required=True,
47 | metavar='',
48 | help='Unique device model identifier.')
49 | @click.option('--device-id', required=True,
50 | metavar='',
51 | help='Unique registered device instance identifier.')
52 | @click.option('--lang', show_default=True,
53 | metavar='',
54 | default='en-US',
55 | help='Language code of the Assistant.')
56 | @click.option('--verbose', '-v', is_flag=True, default=False,
57 | help='Enable verbose logging.')
58 | @click.option('--input-audio-file', '-i', required=True,
59 | metavar='', type=click.File('rb'),
60 | help='Path to input audio file (format: LINEAR16 16000 Hz).')
61 | @click.option('--output-audio-file', '-o', required=True,
62 | metavar='