├── Dockerfile
├── Procfile
├── README.md
├── asr
    ├── __init__.py
    └── model_map.py
├── cloudbuild.yaml
├── requirements.txt
└── runtime.txt


/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.13-alpine
 2 | RUN apk add --no-cache speex speexdsp speex-dev speexdsp-dev git build-base
 3 | RUN git clone https://github.com/pebble-dev/pyspeex.git && pip install cython setuptools && cd pyspeex && make && python setup.py install && cd .. && rm -rf pyspeex
 4 | RUN apk del --no-cache speex-dev speexdsp-dev git
 5 | COPY requirements.txt /requirements.txt
 6 | RUN pip install -r requirements.txt
 7 | RUN apk del --no-cache build-base
 8 | ADD . /code
 9 | WORKDIR /code
10 | CMD exec gunicorn -k gevent -b 0.0.0.0:$PORT asr:app
11 | 


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: gunicorn -k gevent -b 0.0.0.0:$PORT asr:app
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # rebble-asr
2 | asr.rebble.io: speech recognition for rebble
3 | 


--------------------------------------------------------------------------------
/asr/__init__.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | 
  3 | import gevent.monkey
  4 | gevent.monkey.patch_all()
  5 | from email.mime.multipart import MIMEMultipart
  6 | from email.message import Message
  7 | from .model_map import get_model_for_lang
  8 | import json
  9 | import os
 10 | from speex import SpeexDecoder
 11 | from google.cloud.speech_v2 import SpeechClient
 12 | from google.cloud.speech_v2.types import cloud_speech
 13 | from google.cloud import storage
 14 | import time
 15 | from google.api_core.exceptions import ServiceUnavailable
 16 | import base64
 17 | 
 18 | import grpc.experimental.gevent as grpc_gevent
 19 | grpc_gevent.init_gevent()
 20 | 
 21 | import requests
 22 | from flask import Flask, request, Response, abort
 23 | import logging
 24 | import datetime
 25 | 
 26 | import wave
 27 | 
 28 | logging.basicConfig(level=logging.INFO)
 29 | 
 30 | app = Flask(__name__)
 31 | 
 32 | AUTH_URL = os.environ.get("AUTH_URL", "https://auth.rebble.io")
 33 | 
 34 | speech_client = SpeechClient(
 35 |     client_options={"api_endpoint": "us-central1-speech.googleapis.com"}
 36 | )
 37 | 
 38 | storage_client = storage.Client(project=os.environ.get("GCP_PROJECT", 'pebble-rebirth'))
 39 | bucket = storage_client.bucket(os.environ.get("BUCKET_NAME", "rebble-audio-debug"))
 40 | 
 41 | # We know gunicorn does this, but it doesn't *say* it does this, so we must signal it manually.
 42 | @app.before_request
 43 | def handle_chunking():
 44 |     request.environ['wsgi.input_terminated'] = 1
 45 | 
 46 | 
 47 | 
 48 | def parse_chunks(stream):
 49 |     boundary = b'--' + request.headers['content-type'].split(';')[1].split('=')[1].encode('utf-8').strip()  # super lazy/brittle parsing.
 50 |     this_frame = b''
 51 |     while True:
 52 |         content = stream.read(4096)
 53 |         this_frame += content
 54 |         end = this_frame.find(boundary)
 55 |         if end > -1:
 56 |             frame = this_frame[:end]
 57 |             this_frame = this_frame[end + len(boundary):]
 58 |             if frame != b'':
 59 |                 try:
 60 |                     header, content = frame.split(b'\r\n\r\n', 1)
 61 |                 except ValueError:
 62 |                     continue
 63 |                 yield content[:-2]
 64 |         if content == b'':
 65 |             break
 66 | 
 67 | 
 68 | @app.route('/heartbeat')
 69 | def heartbeat():
 70 |     return 'asr'
 71 | 
 72 | @app.route('/NmspServlet/', methods=["POST"])
 73 | def recognise():
 74 |     stream = request.stream
 75 | 
 76 |     access_token, lang = request.host.split('.', 1)[0].split('-', 1)
 77 | 
 78 |     auth_req = requests.get(f"{AUTH_URL}/api/v1/me/token", headers={'Authorization': f"Bearer {access_token}"})
 79 |     if not auth_req.ok:
 80 |         abort(401)
 81 | 
 82 |     result = auth_req.json()
 83 |     if not result['is_subscribed']:
 84 |         abort(402)
 85 | 
 86 |     user_id = result.get('uid', None)
 87 |     audio_debug_enabled = result.get('audio_debug_mode', False)
 88 |     if user_id is None:
 89 |         audio_debug_enabled = False
 90 | 
 91 |     lang = model_map.get_real_lang(lang)
 92 | 
 93 |     req_start = datetime.datetime.now()
 94 |     logging.info("Received transcription request in language: %s", lang)
 95 |     chunks = iter(list(parse_chunks(stream)))
 96 |     logging.info("Audio received in %s", datetime.datetime.now() - req_start)
 97 |     content = next(chunks).decode('utf-8')
 98 |     logging.info("Metadata: %s", content)
 99 | 
100 |     decode_start = datetime.datetime.now()
101 |     decoder = SpeexDecoder(1)
102 |     pcm = bytearray()
103 |     for chunk in chunks:
104 |         pcm.extend(decoder.decode(chunk))
105 |     logging.info("Decoded speex in %s", datetime.datetime.now() - decode_start)
106 | 
107 |     if audio_debug_enabled:
108 |         upload_start = datetime.datetime.now()
109 |         buffer = io.BytesIO(pcm)
110 |         with wave.open(buffer, 'wb') as wav_file:
111 |             wav_file.setnchannels(1)
112 |             wav_file.setsampwidth(2)
113 |             wav_file.setframerate(16000)
114 |             wav_file.writeframes(pcm)
115 |         buffer.seek(0)
116 |         blob = bucket.blob(f"audio/users/{user_id}/recording-{datetime.datetime.now().isoformat()}.wav")
117 |         blob.upload_from_file(buffer, rewind=True, content_type="audio/wav")
118 |         logging.info("Uploaded audio in %s", datetime.datetime.now() - upload_start)
119 | 
120 |     asr_request_start = datetime.datetime.now()
121 |     config = cloud_speech.RecognitionConfig(
122 |         explicit_decoding_config=cloud_speech.ExplicitDecodingConfig(
123 |             encoding=cloud_speech.ExplicitDecodingConfig.AudioEncoding.LINEAR16,
124 |             sample_rate_hertz=16000,
125 |             audio_channel_count=1,
126 |         ),
127 |         language_codes=[lang],
128 |         features=cloud_speech.RecognitionFeatures(
129 |             profanity_filter=True, # matches current behaviour, but do we really want it?
130 |             enable_word_confidence=True, # Pebble uses (ignores) this
131 |             enable_automatic_punctuation=True,
132 |             enable_spoken_punctuation=True,
133 |             max_alternatives=1,
134 |         ),
135 |         model="chirp_2",
136 |     )
137 | 
138 |     asr_request = cloud_speech.RecognizeRequest(
139 |         recognizer=f"projects/pebble-rebirth/locations/us-central1/recognizers/_",
140 |         config=config,
141 |         content=bytes(pcm),
142 |     )
143 |     attempts = 0
144 |     while True:
145 |         try:
146 |             response = speech_client.recognize(asr_request, timeout=10)
147 |         except ServiceUnavailable as e:
148 |             logging.error("ASR request failed: %s", e)
149 |             attempts += 1
150 |             if attempts > 2:
151 |                 raise
152 |             time.sleep(2)
153 |             continue
154 |         else:
155 |             break
156 |     logging.info("ASR request completed in %s", datetime.datetime.now() - asr_request_start)
157 | 
158 |     if audio_debug_enabled:
159 |         complete_response = ''.join(result.alternatives[0].transcript for result in response.results)
160 |         blob.metadata = {
161 |             'rebble-language': lang,
162 |             'rebble-transcript': base64.b64encode(complete_response.encode('utf-8')).decode('utf-8')
163 |         }
164 |         blob.patch()
165 | 
166 |     words = []
167 |     for result in response.results:
168 |         words.extend({
169 |                          'word': x,
170 |                          'confidence': str(result.alternatives[0].confidence),
171 |                      } for x in result.alternatives[0].transcript.split(' '))
172 | 
173 |     # Now for some reason we also need to give back a mime/multipart message...
174 |     parts = MIMEMultipart()
175 |     response_part = Message()
176 |     response_part.add_header('Content-Type', 'application/JSON; charset=utf-8')
177 | 
178 |     if len(words) > 0:
179 |         response_part.add_header('Content-Disposition', 'form-data; name="QueryResult"')
180 |         words[0]['word'] += '\\*no-space-before'
181 |         words[0]['word'] = words[0]['word'][0].upper() + words[0]['word'][1:]
182 |         response_part.set_payload(json.dumps({
183 |             'words': [words],
184 |         }))
185 |     else:
186 |         response_part.add_header('Content-Disposition', 'form-data; name="QueryRetry"')
187 |         # Other errors probably exist, but I don't know what they are.
188 |         # This is a Nuance error verbatim.
189 |         response_part.set_payload(json.dumps({
190 |             "Cause": 1,
191 |             "Name": "AUDIO_INFO",
192 |             "Prompt": "Sorry, speech not recognized. Please try again."
193 |         }))
194 |     parts.attach(response_part)
195 | 
196 |     parts.set_boundary('--Nuance_NMSP_vutc5w1XobDdefsYG3wq')
197 | 
198 |     response = Response('\r\n' + parts.as_string().split("\n", 3)[3].replace('\n', '\r\n'))
199 |     response.headers['Content-Type'] = f'multipart/form-data; boundary={parts.get_boundary()}'
200 |     logging.info("Request complete in %s", datetime.datetime.now() - req_start)
201 |     return response
202 | 


--------------------------------------------------------------------------------
/asr/model_map.py:
--------------------------------------------------------------------------------
 1 | MODEL_MAP = {
 2 |     'af-za': 'chirp_2',
 3 |     'cs-cz': 'chirp_2',
 4 |     'da-dk': 'chirp_2',
 5 |     'de-de': 'chirp_2',
 6 |     'en-au': 'chirp_2',
 7 |     'en-us': 'chirp_2',
 8 |     'en-gb': 'chirp_2',
 9 |     'en-in': 'chirp_2',
10 |     'fi-fi': 'chirp_2',
11 |     'fil-ph': 'chirp_2',
12 |     'fr-ca': 'chirp_2',
13 |     'fr-fr': 'chirp_2',
14 |     'gl-es': 'chirp_2',
15 |     'id-id': 'chirp_2',
16 |     'is-is': 'chirp_2',
17 |     'it-it': 'chirp_2',
18 |     'ko-kr': 'chirp_2',
19 |     'lv-lv': 'chirp_2',
20 |     'lt-lt': 'chirp_2',
21 |     'hr-hr': 'chirp_2',
22 |     'hu-hu': 'chirp_2',
23 |     'ms-my': 'chirp_2',
24 |     'nl-nl': 'chirp_2',
25 |     'no-no': 'chirp_2',
26 |     'pt-pt': 'chirp_2',
27 |     'pl-pl': 'chirp_2',
28 |     'ro-ro': 'chirp_2',
29 |     'ru-ru': 'chirp_2',
30 |     'uk-ua': 'chirp_2',
31 |     'es-es': 'chirp_2',
32 |     'es-us': 'chirp_2',
33 |     'sk-sk': 'chirp_2',
34 |     'sl-si': 'chirp_2',
35 |     'sv-se': 'chirp_2',
36 |     'sw-ke': 'chirp_2',
37 |     'tr-tr': 'chirp_2',
38 |     'zu-za': 'chirp_2',
39 | }
40 | 
41 | LANGUAGE_OVERRIDES = {
42 |     'en-ca': 'en-us', # Cloud Speech V2 dropped en-ca support. chirp_2 is universal, so this is probably close enough.
43 |     'es-mx': 'es-us', # also dropped es-mx, apparently
44 |     'sw-tz': 'sw-ke', # also dropped sw-tz. I don't know enough to know whether this makes sense, to be honest.
45 |     'nb-no': 'no-no', # I'm still pretty sure this one was a typo.
46 |     'auto-auto': 'auto', # this is a special case for the auto-detect language code
47 | }
48 | 
49 | 
50 | def get_model_for_lang(code: str) -> str:
51 |     return MODEL_MAP.get(code.lower(), 'chirp_2')
52 | 
53 | def get_real_lang(code: str) -> str:
54 |     return LANGUAGE_OVERRIDES.get(code.lower(), code.lower())
55 | 


--------------------------------------------------------------------------------
/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | steps:
 2 | - name: 'gcr.io/cloud-builders/docker'
 3 |   args:
 4 |   - build
 5 |   - "--tag=gcr.io/pebble-rebirth/asr:g$SHORT_SHA"
 6 |   - "--file=./Dockerfile"
 7 |   - .
 8 | images:
 9 | - "gcr.io/pebble-rebirth/asr:g$SHORT_SHA"
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | blinker==1.9.0
 2 | cachetools==5.5.2
 3 | certifi==2025.1.31
 4 | chardet==5.2.0
 5 | charset-normalizer==3.4.1
 6 | click==8.1.8
 7 | Cython==3.0.12
 8 | Flask==3.1.0
 9 | gevent==24.11.1
10 | google-api-core==2.24.2
11 | google-auth==2.38.0
12 | google-cloud-speech==2.31.1
13 | google-cloud-storage==3.1.0
14 | googleapis-common-protos==1.69.2
15 | greenlet==3.1.1
16 | grpcio==1.71.0
17 | grpcio-status==1.71.0
18 | gunicorn==23.0.0
19 | idna==3.10
20 | itsdangerous==2.2.0
21 | Jinja2==3.1.6
22 | MarkupSafe==3.0.2
23 | packaging==24.2
24 | proto-plus==1.26.1
25 | protobuf==5.29.4
26 | pyasn1==0.6.1
27 | pyasn1_modules==0.4.1
28 | requests==2.32.3
29 | rsa==4.9
30 | setuptools==77.0.3
31 | # speex==0.9.1
32 | urllib3==2.3.0
33 | Werkzeug==3.1.3
34 | zope.event==5.0
35 | zope.interface==7.2
36 | 


--------------------------------------------------------------------------------
/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.6.6
2 | 


--------------------------------------------------------------------------------