├── .github
    └── FUNDING.yml
├── .gitignore
├── LICENSE
├── README.md
├── backend
    ├── __init__.py
    ├── demo_web_app.py
    ├── google_speech_wrapper.py
    ├── requirements.txt
    └── settings.py
├── package-lock.json
├── package.json
├── public
    ├── favicon.ico
    ├── index.html
    ├── manifest.json
    └── robots.txt
└── src
    ├── App.css
    ├── App.js
    ├── App.test.js
    ├── SettingsSection.js
    ├── TranscribeOutput.js
    ├── index.css
    ├── index.js
    ├── serviceWorker.js
    ├── setupTests.js
    └── utility_transcribe.js


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: saharmor
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | /.idea/
131 | node_modules
132 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Sahar
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |   <img width="60px" src="https://user-images.githubusercontent.com/6180201/124313197-cc93f200-db70-11eb-864a-fc65765fc038.png" alt="giant microphone"/><br/>
 3 |   <h2 align="center">Real-time Transcription Playground</h2>
 4 | </p>
 5 | 
 6 | A real-time transcription project using React and a socketio python server. The goal of this project is to enable developers to create web demos and speech2text prototypes with just a few lines of code. Examples can be medical dictation apps, a note-taking CRM for entrepreneurs, etc.
 7 | 
 8 | *Currently only supports real-time transcription using Google Cloud Speech*
 9 | 
10 | # Demo
11 | https://user-images.githubusercontent.com/6180201/124362454-370e6600-dc35-11eb-8374-77da5aec25b2.mp4
12 | 
13 | 
14 | # Installation
15 | * Python 3 [instructions](https://realpython.com/installing-python/)
16 | * `yarn` [instructions](https://classic.yarnpkg.com/en/docs/install/#mac-stable)
17 | 
18 | ## Google Speech API
19 | The code assumes an environment variable `GOOGLE_SERVICE_JSON_FILE` that points to a valid GCP service account file.
20 | 
21 | If you need to get a service account:
22 |   - Within your Google Cloud console, create or select a project
23 |   - Enable the Cloud Speech API for that project
24 |   - Create a service account
25 |   - Download a private key as JSON
26 | 
27 | More info in Google Cloud's docs [here](https://cloud.google.com/speech-to-text/docs/quickstart-client-libraries#before-you-begin) and [here](https://codelabs.developers.google.com/codelabs/cloud-speech-text-python3#0).<br/>
28 | 
29 | Then, set the environment variable `GOOGLE_SERVICE_JSON_FILE` to the path of the JSON file containing your service account key, e.g. `/users/sahar/documents/sample-project-3c1a5892b00e.json`. Further details can be found in this [Medium article](https://medium.com/geekculture/how-to-build-a-full-stack-transcription-app-with-google-cloud-react-and-python-2dfdcb5e556f).
30 | 
31 | # Setup
32 | 1. Clone or fork this repository
33 | 2. Create a virtual environment in the root directory: `python -m venv $ENV_NAME`
34 | 3. Activate the virtual environment: ` source $ENV_NAME/bin/activate` (for MacOS, Unix, or Linux users) or ` .\ENV_NAME\Scripts\activate` (for Windows users)
35 | 4. Install requirements: `pip install -r backend/requirements.txt`
36 | 5. Set your environment variable `GOOGLE_SERVICE_JSON_FILE` to point to your file path
37 | 6. Run `yarn install` in the root directory
38 | 7. Run `yarn start` to start the frontend and `start-backend` to run the backend
39 | 


--------------------------------------------------------------------------------
/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saharmor/realtime-transcription-playground/5cb73a10acc222599084296f593060e7548667cd/backend/__init__.py


--------------------------------------------------------------------------------
/backend/demo_web_app.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import socketio
 4 | from aiohttp import web
 5 | 
 6 | from backend.settings import BACKEND_PORT
 7 | from google_speech_wrapper import GoogleSpeechWrapper
 8 | 
 9 | app = web.Application()
10 | sio = socketio.AsyncServer(cors_allowed_origins=[])  # * is bad
11 | 
12 | # Binds our Socket.IO server to our web app instance
13 | sio.attach(app)
14 | 
15 | 
16 | @asyncio.coroutine
17 | @sio.on('startGoogleCloudStream')
18 | async def start_google_stream(sid, config):
19 |     print(f'Starting streaming audio data from client {sid}')
20 |     await GoogleSpeechWrapper.start_recognition_stream(sio, sid, config)
21 | 
22 | 
23 | @sio.on('binaryAudioData')
24 | async def receive_binary_audio_data(sid, message):
25 |     GoogleSpeechWrapper.receive_data(sid, message)
26 | 
27 | 
28 | @sio.on('endGoogleCloudStream')
29 | async def close_google_stream(sid):
30 |     print(f'Closing streaming data from client {sid}')
31 |     await GoogleSpeechWrapper.stop_recognition_stream(sid)
32 | 
33 | 
34 | web.run_app(app, port=BACKEND_PORT)
35 | 


--------------------------------------------------------------------------------
/backend/google_speech_wrapper.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import queue
  3 | import sys
  4 | import threading
  5 | from typing import Dict
  6 | 
  7 | from google.cloud import speech
  8 | 
  9 | from backend.settings import GOOGLE_SERVICE_JSON_FILE
 10 | 
 11 | clients = {}
 12 | 
 13 | 
 14 | class ClientData:
 15 |     def __init__(self, transcribe_thread, conn, config: Dict):
 16 |         self._buff = queue.Queue()
 17 |         self._thread = transcribe_thread
 18 |         self._closed = True
 19 |         self._conn = conn
 20 |         self.general_config = {dict_key: config[dict_key] for dict_key in config if dict_key != 'audio'}
 21 |         self.audio_config = config['audio']
 22 | 
 23 |     async def close(self):
 24 |         self._closed = True
 25 |         self._buff.put(None)
 26 |         self._thread.join()
 27 |         await self._conn.emit('endGoogleCloudStream', '')
 28 | 
 29 |     def start_transcribing(self):
 30 |         self._closed = False
 31 |         self._thread.start()
 32 | 
 33 |     def add_data(self, data):
 34 |         self._buff.put(data)
 35 | 
 36 |     def generator(self):
 37 |         """
 38 |         Code take and slightly modified from https://github.com/googleapis/python-speech/blob/master/samples/microphone/transcribe_streaming_infinite.py
 39 |         """
 40 |         while not self._closed:
 41 |             # Use a blocking get() to ensure there's at least one chunk of
 42 |             # data, and stop iteration if the chunk is None, indicating the
 43 |             # end of the audio stream.
 44 |             chunk = self._buff.get()
 45 |             if chunk is None:
 46 |                 return
 47 | 
 48 |             data = [chunk]
 49 | 
 50 |             # Now consume whatever other data's still buffered.
 51 |             while True:
 52 |                 try:
 53 |                     chunk = self._buff.get(block=False)
 54 |                     if chunk is None:
 55 |                         return
 56 |                     data.append(chunk)
 57 |                 except queue.Empty:
 58 |                     break
 59 | 
 60 |             yield b"".join(data)
 61 | 
 62 |     async def send_client_data(self, data, is_final: bool):
 63 |         await self._conn.emit('speechData', {'data': data, 'isFinal': is_final})
 64 | 
 65 | 
 66 | async def listen_print_loop(responses, client: ClientData):
 67 |     """
 68 |     Code taken and slightly modified from https://github.com/googleapis/python-speech/blob/master/samples/microphone/transcribe_streaming_infinite.py
 69 |     Iterates through server responses and sends them back to client.
 70 | 
 71 |     The responses passed is a generator that will block until a response
 72 |     is provided by the server.
 73 | 
 74 |     Each response may contain multiple results, and each result may contain
 75 |     multiple alternatives; for details, see https://goo.gl/tjCPAU.  Here we
 76 |     print only the transcription for the top alternative of the top result.
 77 | 
 78 |     In this case, responses are provided for interim results as well. If the
 79 |     response is an interim one, print a line feed at the end of it, to allow
 80 |     the next result to overwrite it, until the response is a final one. For the
 81 |     final one, print a newline to preserve the finalized transcription.
 82 |     """
 83 |     num_chars_printed = 0
 84 |     interim_flush_counter = 0
 85 |     for response in responses:
 86 |         if not response.results:
 87 |             continue
 88 | 
 89 |         # The `results` list is consecutive. For streaming, we only care about
 90 |         # the first result being considered, since once it's `is_final`, it
 91 |         # moves on to considering the next utterance.
 92 |         result = response.results[0]
 93 |         if not result.alternatives:
 94 |             continue
 95 | 
 96 |         # Display the transcription of the top alternative.
 97 |         transcript = result.alternatives[0].transcript
 98 | 
 99 |         # Display interim results, but with a carriage return at the end of the
100 |         # line, so subsequent lines will overwrite them. If the previous result was
101 |         # longer than this one, we need to print some extra spaces to overwrite the previous result
102 |         overwrite_chars = " " * (num_chars_printed - len(transcript))
103 | 
104 |         if not result.is_final:
105 |             sys.stdout.write(transcript + overwrite_chars + "\r")
106 |             sys.stdout.flush()
107 |             interim_flush_counter += 1
108 | 
109 |             if client and interim_flush_counter % 3 == 0:
110 |                 interim_flush_counter = 0
111 |                 await client.send_client_data(transcript + overwrite_chars + "\r", False)
112 | 
113 |             num_chars_printed = len(transcript)
114 |         else:
115 |             text = transcript + overwrite_chars
116 |             print(text)
117 | 
118 |             if client:
119 |                 await client.send_client_data(text, True)
120 | 
121 |             num_chars_printed = 0
122 | 
123 | 
124 | class GoogleSpeechWrapper:
125 |     encoding_map = {'LINEAR16': speech.RecognitionConfig.AudioEncoding.LINEAR16}
126 | 
127 |     @staticmethod
128 |     async def start_listen(client_id: str):
129 |         client = clients[client_id]
130 |         speech_client = speech.SpeechClient.from_service_account_json(GOOGLE_SERVICE_JSON_FILE)
131 |         config = speech.RecognitionConfig(encoding=GoogleSpeechWrapper.encoding_map[client.audio_config['encoding']], sample_rate_hertz=client.audio_config['sampleRateHertz'],
132 |                                           language_code=client.audio_config['languageCode'], enable_automatic_punctuation=True)
133 |         streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=client.general_config['interimResults'])
134 | 
135 |         audio_generator = client.generator()
136 |         requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator)
137 |         responses = speech_client.streaming_recognize(streaming_config, requests)
138 |         await listen_print_loop(responses, client)
139 | 
140 |         # In case of ERROR
141 |         # client.emit('googleCloudStreamError', err);
142 |         # client._conn.emit('endGoogleCloudStream', '')
143 | 
144 |     @staticmethod
145 |     async def start_recognition_stream(sio, client_id: str, config: Dict):
146 |         if client_id not in clients:
147 |             clients[client_id] = ClientData(threading.Thread(target=asyncio.run, args=(GoogleSpeechWrapper.start_listen(client_id),)), sio, config)
148 |             clients[client_id].start_transcribing()
149 |         else:
150 |             print('Warning - already running transcription for client')
151 | 
152 |     @staticmethod
153 |     async def stop_recognition_stream(client_id: str):
154 |         if client_id in clients:
155 |             await clients[client_id].close()
156 |             del clients[client_id]
157 | 
158 |     @staticmethod
159 |     def receive_data(client_id: str, data):
160 |         if client_id not in clients:
161 |             return
162 | 
163 |         clients[client_id].add_data(data)
164 | 


--------------------------------------------------------------------------------
/backend/requirements.txt:
--------------------------------------------------------------------------------
1 | python-socketio==5.3.0
2 | aiohttp==3.7.4.post0
3 | google-cloud-speech==2.5.0


--------------------------------------------------------------------------------
/backend/settings.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | GOOGLE_SERVICE_JSON_FILE = os.environ['GOOGLE_SERVICE_JSON_FILE']
4 | BACKEND_PORT = 10000
5 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "realtime-transcribe-playground",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "dependencies": {
 6 |     "@material-ui/core": "^4.11.4",
 7 |     "@testing-library/jest-dom": "^4.2.4",
 8 |     "@testing-library/react": "^9.3.2",
 9 |     "@testing-library/user-event": "^7.1.2",
10 |     "axios": "^0.21.1",
11 |     "bootstrap": "^4.5.0",
12 |     "lodash": "^4.17.19",
13 |     "react": "^16.13.1",
14 |     "react-bootstrap": "^1.2.2",
15 |     "react-dom": "^16.13.1",
16 |     "react-scripts": "^3.4.1",
17 |     "react-spinners": "^0.10.6",
18 |     "socket.io-client": "^4.1.2"
19 |   },
20 |   "scripts": {
21 |     "start": "react-scripts start",
22 |     "start-backend": "cd backend && source venv/bin/activate && python demo_web_app.py",
23 |     "build": "react-scripts build",
24 |     "test": "react-scripts test",
25 |     "eject": "react-scripts eject"
26 |   },
27 |   "eslintConfig": {
28 |     "extends": "react-app"
29 |   },
30 |   "browserslist": {
31 |     "production": [
32 |       ">0.2%",
33 |       "not dead",
34 |       "not op_mini all"
35 |     ],
36 |     "development": [
37 |       "last 1 chrome version",
38 |       "last 1 firefox version",
39 |       "last 1 safari version"
40 |     ]
41 |   },
42 |   "proxy": "http://localhost:5000"
43 | }
44 | 


--------------------------------------------------------------------------------
/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saharmor/realtime-transcription-playground/5cb73a10acc222599084296f593060e7548667cd/public/favicon.ico


--------------------------------------------------------------------------------
/public/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 6 |     <meta name="theme-color" content="#000000" />
 7 |     <meta name="description" content="Build your speech2text app"/>
 8 |     <!--
 9 |       manifest.json provides metadata used when your web app is installed on a
10 |       user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
11 |     -->
12 |     <link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
13 |     <!--
14 |       Notice the use of %PUBLIC_URL% in the tags above.
15 |       It will be replaced with the URL of the `public` folder during the build.
16 |       Only files inside the `public` folder can be referenced from the HTML.
17 |       Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
18 |       work correctly both with client-side routing and a non-root public URL.
19 |       Learn how to configure a non-root public URL by running `npm run build`.
20 |     -->
21 |     <title>Realtime Transcription Playground</title>
22 |   </head>
23 |   <body>
24 |     <noscript>You need to enable JavaScript to run this app.</noscript>
25 |     <div id="root"></div>
26 |     <!--
27 |       This HTML file is a template.
28 |       If you open it directly in the browser, you will see an empty page.
29 |       You can add webfonts, meta tags, or analytics to this file.
30 |       The build step will place the bundled scripts into the <body> tag.
31 |       To begin the development, run `npm start` or `yarn start`.
32 |       To create a production bundle, use `npm run build` or `yarn build`.
33 |     -->
34 |   </body>
35 | </html>


--------------------------------------------------------------------------------
/public/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "short_name": "Transcription Playground",
 3 |   "name": "Realtime Transcription Playground",
 4 |   "icons": [],
 5 |   "start_url": ".",
 6 |   "display": "standalone",
 7 |   "theme_color": "#000000",
 8 |   "background_color": "#ffffff"
 9 | }
10 | 


--------------------------------------------------------------------------------
/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 | Disallow:
4 | 


--------------------------------------------------------------------------------
/src/App.css:
--------------------------------------------------------------------------------
 1 | html {
 2 |     height: 100%;
 3 | }
 4 | 
 5 | body {
 6 |     min-height: 100%;
 7 | }
 8 | 
 9 | #root {
10 |     height: 100vh;
11 |     background-color: #f6f6ef;
12 |     display: flex;
13 |     flex-direction: row;
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/src/App.js:
--------------------------------------------------------------------------------
  1 | import React, {useState} from "react";
  2 | import {Button} from "react-bootstrap";
  3 | import withStyles from "@material-ui/core/styles/withStyles";
  4 | import Typography from "@material-ui/core/Typography";
  5 | import "bootstrap/dist/css/bootstrap.min.css";
  6 | import "./App.css";
  7 | import speechToTextUtils from "./utility_transcribe";
  8 | import TranscribeOutput from "./TranscribeOutput";
  9 | import SettingsSections from "./SettingsSection";
 10 | 
 11 | const useStyles = () => ({
 12 |   root: {
 13 |     display: 'flex',
 14 |     flex: '1',
 15 |     margin: '100px 0px 100px 0px',
 16 |     alignItems: 'center',
 17 |     textAlign: 'center',
 18 |     flexDirection: 'column',
 19 |   },
 20 |   title: {
 21 |     marginBottom: '20px',
 22 |   },
 23 |   settingsSection: {
 24 |     marginBottom: '20px',
 25 |   },
 26 |   buttonsSection: {
 27 |     marginBottom: '40px',
 28 |   },
 29 | });
 30 | 
 31 | const App = ({classes}) => {
 32 |   const [transcribedData, setTranscribedData] = useState([]);
 33 |   const [interimTranscribedData, setInterimTranscribedData] = useState('');
 34 |   const [isRecording, setIsRecording] = useState(false);
 35 |   const [selectedLanguage, setSelectedLanguage] = useState('en-US');
 36 | 
 37 |   const supportedLanguages = {'en-US': 'English', 'de-DE': 'German', 'fr-FR': 'French', 'es-ES': 'Spanish'}
 38 | 
 39 |   function flushInterimData() {
 40 |     if (interimTranscribedData !== '') {
 41 |       setInterimTranscribedData('')
 42 |       setTranscribedData(oldData => [...oldData, interimTranscribedData])
 43 |     }
 44 |   }
 45 | 
 46 |   function handleDataReceived(data, isFinal) {
 47 |     if (isFinal) {
 48 |       setInterimTranscribedData('')
 49 |       setTranscribedData(oldData => [...oldData, data])
 50 |     } else {
 51 |       setInterimTranscribedData(data)
 52 |     }
 53 |   }
 54 | 
 55 |   function getTranscriptionConfig() {
 56 |     return {
 57 |       audio: {
 58 |         encoding: 'LINEAR16',
 59 |         sampleRateHertz: 16000,
 60 |         languageCode: selectedLanguage,
 61 |       },
 62 |       interimResults: true
 63 |     }
 64 |   }
 65 | 
 66 |   function onStart() {
 67 |     setTranscribedData([])
 68 |     setIsRecording(true)
 69 | 
 70 |     speechToTextUtils.initRecording(
 71 |       getTranscriptionConfig(),
 72 |       handleDataReceived,
 73 |       (error) => {
 74 |         console.error('Error when transcribing', error);
 75 |         setIsRecording(false)
 76 |         // No further action needed, as stream already closes itself on error
 77 |       });
 78 |   }
 79 | 
 80 |   function onStop() {
 81 |     setIsRecording(false)
 82 |     flushInterimData() // A safety net if Google's Speech API doesn't work as expected, i.e. always sends the final result
 83 |     speechToTextUtils.stopRecording();
 84 |   }
 85 | 
 86 |   return (
 87 |     <div className={classes.root}>
 88 |       <div className={classes.title}>
 89 |         <Typography variant="h3">
 90 |           Your Transcription App <span role="img" aria-label="microphone-emoji">🎤</span>
 91 |         </Typography>
 92 |       </div>
 93 |       <div className={classes.settingsSection}>
 94 |         <SettingsSections possibleLanguages={supportedLanguages} selectedLanguage={selectedLanguage}
 95 |                           onLanguageChanged={setSelectedLanguage}/>
 96 |       </div>
 97 |       <div className={classes.buttonsSection}>
 98 |         {!isRecording && <Button onClick={onStart} variant="primary">Start transcribing</Button>}
 99 |         {isRecording && <Button onClick={onStop} variant="danger">Stop</Button>}
100 |       </div>
101 |       <div>
102 |         <TranscribeOutput transcribedText={transcribedData} interimTranscribedText={interimTranscribedData}/>
103 |       </div>
104 |     </div>
105 |   );
106 | }
107 | 
108 | export default withStyles(useStyles)(App);
109 | 


--------------------------------------------------------------------------------
/src/App.test.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { render } from '@testing-library/react';
 3 | import App from './App';
 4 | 
 5 | test('renders learn react link', () => {
 6 |   const { getByText } = render(<App />);
 7 |   const linkElement = getByText(/learn react/i);
 8 |   expect(linkElement).toBeInTheDocument();
 9 | });
10 | 


--------------------------------------------------------------------------------
/src/SettingsSection.js:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | import Select from '@material-ui/core/Select';
 3 | import MenuItem from '@material-ui/core/MenuItem';
 4 | 
 5 | 
 6 | const SettingsSections = ({possibleLanguages, selectedLanguage, onLanguageChanged}) => {
 7 |   function onLangChangedLocal(event) {
 8 |     onLanguageChanged(event.target.value)
 9 |   }
10 | 
11 |   return (
12 |     <div>
13 |       <Select labelId="language-select-label" value={selectedLanguage} onChange={onLangChangedLocal}>
14 |         {Object.keys(possibleLanguages).map((language) => {
15 |           return <MenuItem key={language} value={language}>{possibleLanguages[language]}</MenuItem>
16 |         })}
17 |       </Select>
18 |     </div>
19 |   )
20 | }
21 | 
22 | export default SettingsSections;
23 | 


--------------------------------------------------------------------------------
/src/TranscribeOutput.js:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | import withStyles from "@material-ui/core/styles/withStyles";
 3 | import Typography from "@material-ui/core/Typography";
 4 | 
 5 | 
 6 | const useStyles = () => ({
 7 |   root: {
 8 |     maxWidth: '800px',
 9 |     display: 'flex'
10 |   },
11 |   outputText: {
12 |     marginLeft: '8px',
13 |     color: '#ef395a',
14 |   }
15 | });
16 | 
17 | const TranscribeOutput = ({classes, transcribedText, interimTranscribedText}) => {
18 |   if (transcribedText.length === 0 && interimTranscribedText.length === 0) {
19 |     return <Typography variant="body1">...</Typography>;
20 |   }
21 | 
22 |   return (
23 |     <div className={classes.root}>
24 |       <Typography variant="body1">{transcribedText}</Typography>
25 |       <Typography className={classes.outputText} variant="body1">{interimTranscribedText}</Typography>
26 |     </div>
27 |   )
28 | }
29 | 
30 | export default withStyles(useStyles)(TranscribeOutput);
31 | 


--------------------------------------------------------------------------------
/src/index.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   margin: 0;
 3 |   font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
 4 |     'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
 5 |     sans-serif;
 6 |   -webkit-font-smoothing: antialiased;
 7 |   -moz-osx-font-smoothing: grayscale;
 8 | }
 9 | 
10 | code {
11 |   font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
12 |     monospace;
13 | }
14 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import ReactDOM from 'react-dom';
 3 | import './index.css';
 4 | import App from './App';
 5 | import * as serviceWorker from './serviceWorker';
 6 | 
 7 | ReactDOM.render(
 8 |   <React.StrictMode>
 9 |     <App />
10 |   </React.StrictMode>,
11 |   document.getElementById('root')
12 | );
13 | 
14 | // If you want your app to work offline and load faster, you can change
15 | // unregister() to register() below. Note this comes with some pitfalls.
16 | // Learn more about service workers: https://bit.ly/CRA-PWA
17 | serviceWorker.unregister();
18 | 


--------------------------------------------------------------------------------
/src/serviceWorker.js:
--------------------------------------------------------------------------------
  1 | // This optional code is used to register a service worker.
  2 | // register() is not called by default.
  3 | 
  4 | // This lets the app load faster on subsequent visits in production, and gives
  5 | // it offline capabilities. However, it also means that developers (and users)
  6 | // will only see deployed updates on subsequent visits to a page, after all the
  7 | // existing tabs open on the page have been closed, since previously cached
  8 | // resources are updated in the background.
  9 | 
 10 | // To learn more about the benefits of this model and instructions on how to
 11 | // opt-in, read https://bit.ly/CRA-PWA
 12 | 
 13 | const isLocalhost = Boolean(
 14 |   window.location.hostname === 'localhost' ||
 15 |     // [::1] is the IPv6 localhost address.
 16 |     window.location.hostname === '[::1]' ||
 17 |     // 127.0.0.0/8 are considered localhost for IPv4.
 18 |     window.location.hostname.match(
 19 |       /^127(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}$/
 20 |     )
 21 | );
 22 | 
 23 | export function register(config) {
 24 |   if (process.env.NODE_ENV === 'production' && 'serviceWorker' in navigator) {
 25 |     // The URL constructor is available in all browsers that support SW.
 26 |     const publicUrl = new URL(process.env.PUBLIC_URL, window.location.href);
 27 |     if (publicUrl.origin !== window.location.origin) {
 28 |       // Our service worker won't work if PUBLIC_URL is on a different origin
 29 |       // from what our page is served on. This might happen if a CDN is used to
 30 |       // serve assets; see https://github.com/facebook/create-react-app/issues/2374
 31 |       return;
 32 |     }
 33 | 
 34 |     window.addEventListener('load', () => {
 35 |       const swUrl = `${process.env.PUBLIC_URL}/service-worker.js`;
 36 | 
 37 |       if (isLocalhost) {
 38 |         // This is running on localhost. Let's check if a service worker still exists or not.
 39 |         checkValidServiceWorker(swUrl, config);
 40 | 
 41 |         // Add some additional logging to localhost, pointing developers to the
 42 |         // service worker/PWA documentation.
 43 |         navigator.serviceWorker.ready.then(() => {
 44 |           console.log(
 45 |             'This web app is being served cache-first by a service ' +
 46 |               'worker. To learn more, visit https://bit.ly/CRA-PWA'
 47 |           );
 48 |         });
 49 |       } else {
 50 |         // Is not localhost. Just register service worker
 51 |         registerValidSW(swUrl, config);
 52 |       }
 53 |     });
 54 |   }
 55 | }
 56 | 
 57 | function registerValidSW(swUrl, config) {
 58 |   navigator.serviceWorker
 59 |     .register(swUrl)
 60 |     .then(registration => {
 61 |       registration.onupdatefound = () => {
 62 |         const installingWorker = registration.installing;
 63 |         if (installingWorker == null) {
 64 |           return;
 65 |         }
 66 |         installingWorker.onstatechange = () => {
 67 |           if (installingWorker.state === 'installed') {
 68 |             if (navigator.serviceWorker.controller) {
 69 |               // At this point, the updated precached content has been fetched,
 70 |               // but the previous service worker will still serve the older
 71 |               // content until all client tabs are closed.
 72 |               console.log(
 73 |                 'New content is available and will be used when all ' +
 74 |                   'tabs for this page are closed. See https://bit.ly/CRA-PWA.'
 75 |               );
 76 | 
 77 |               // Execute callback
 78 |               if (config && config.onUpdate) {
 79 |                 config.onUpdate(registration);
 80 |               }
 81 |             } else {
 82 |               // At this point, everything has been precached.
 83 |               // It's the perfect time to display a
 84 |               // "Content is cached for offline use." message.
 85 |               console.log('Content is cached for offline use.');
 86 | 
 87 |               // Execute callback
 88 |               if (config && config.onSuccess) {
 89 |                 config.onSuccess(registration);
 90 |               }
 91 |             }
 92 |           }
 93 |         };
 94 |       };
 95 |     })
 96 |     .catch(error => {
 97 |       console.error('Error during service worker registration:', error);
 98 |     });
 99 | }
100 | 
101 | function checkValidServiceWorker(swUrl, config) {
102 |   // Check if the service worker can be found. If it can't reload the page.
103 |   fetch(swUrl, {
104 |     headers: { 'Service-Worker': 'script' },
105 |   })
106 |     .then(response => {
107 |       // Ensure service worker exists, and that we really are getting a JS file.
108 |       const contentType = response.headers.get('content-type');
109 |       if (
110 |         response.status === 404 ||
111 |         (contentType != null && contentType.indexOf('javascript') === -1)
112 |       ) {
113 |         // No service worker found. Probably a different app. Reload the page.
114 |         navigator.serviceWorker.ready.then(registration => {
115 |           registration.unregister().then(() => {
116 |             window.location.reload();
117 |           });
118 |         });
119 |       } else {
120 |         // Service worker found. Proceed as normal.
121 |         registerValidSW(swUrl, config);
122 |       }
123 |     })
124 |     .catch(() => {
125 |       console.log(
126 |         'No internet connection found. App is running in offline mode.'
127 |       );
128 |     });
129 | }
130 | 
131 | export function unregister() {
132 |   if ('serviceWorker' in navigator) {
133 |     navigator.serviceWorker.ready
134 |       .then(registration => {
135 |         registration.unregister();
136 |       })
137 |       .catch(error => {
138 |         console.error(error.message);
139 |       });
140 |   }
141 | }
142 | 


--------------------------------------------------------------------------------
/src/setupTests.js:
--------------------------------------------------------------------------------
1 | // jest-dom adds custom jest matchers for asserting on DOM nodes.
2 | // allows you to do things like:
3 | // expect(element).toHaveTextContent(/react/i)
4 | // learn more: https://github.com/testing-library/jest-dom
5 | import '@testing-library/jest-dom/extend-expect';
6 | 


--------------------------------------------------------------------------------
/src/utility_transcribe.js:
--------------------------------------------------------------------------------
  1 | import io from 'socket.io-client';
  2 | 
  3 | const socket = new io.connect("http://0.0.0.0:10000/", {transports: ['websocket']});
  4 | 
  5 | // Stream Audio
  6 | let bufferSize = 2048,
  7 |   AudioContext,
  8 |   context,
  9 |   processor,
 10 |   input,
 11 |   globalStream;
 12 | 
 13 | const mediaConstraints = {
 14 |   audio: true,
 15 |   video: false
 16 | };
 17 | 
 18 | let AudioStreamer = {
 19 |   /**
 20 |    * @param {object} transcribeConfig Transcription configuration such as language, encoding, etc.
 21 |    * @param {function} onData Callback to run on data each time it's received
 22 |    * @param {function} onError Callback to run on an error if one is emitted.
 23 |    */
 24 |   initRecording: function (transcribeConfig, onData, onError) {
 25 |     socket.emit('startGoogleCloudStream', {...transcribeConfig});
 26 |     AudioContext = window.AudioContext || window.webkitAudioContext;
 27 |     context = new AudioContext();
 28 |     processor = context.createScriptProcessor(bufferSize, 1, 1);
 29 |     processor.connect(context.destination);
 30 |     context.resume();
 31 | 
 32 |     const handleSuccess = function (stream) {
 33 |       globalStream = stream;
 34 |       input = context.createMediaStreamSource(stream);
 35 |       input.connect(processor);
 36 | 
 37 |       processor.onaudioprocess = function (e) {
 38 |         microphoneProcess(e);
 39 |       };
 40 |     };
 41 | 
 42 |     navigator.mediaDevices.getUserMedia(mediaConstraints)
 43 |       .then(handleSuccess);
 44 | 
 45 |     if (onData) {
 46 |       socket.on('speechData', (response) => {
 47 |         onData(response.data, response.isFinal);
 48 |       });
 49 |     }
 50 | 
 51 |     socket.on('googleCloudStreamError', (error) => {
 52 |       if (onError) {
 53 |         onError('error');
 54 |       }
 55 |       closeAll();
 56 |     });
 57 | 
 58 |     socket.on('endGoogleCloudStream', () => {
 59 |       closeAll();
 60 |     });
 61 |   },
 62 | 
 63 |   stopRecording: function () {
 64 |     socket.emit('endGoogleCloudStream');
 65 |     closeAll();
 66 |   }
 67 | }
 68 | 
 69 | export default AudioStreamer;
 70 | 
 71 | // Helper functions
 72 | /**
 73 |  * Processes microphone data into a data stream
 74 |  *
 75 |  * @param {object} e Input from the microphone
 76 |  */
 77 | function microphoneProcess(e) {
 78 |   const left = e.inputBuffer.getChannelData(0);
 79 |   const left16 = convertFloat32ToInt16(left);
 80 |   socket.emit('binaryAudioData', left16);
 81 | }
 82 | 
 83 | /**
 84 |  * Converts a buffer from float32 to int16. Necessary for streaming.
 85 |  * sampleRateHertz of 1600.
 86 |  *
 87 |  * @param {object} buffer Buffer being converted
 88 |  */
 89 | function convertFloat32ToInt16(buffer) {
 90 |   let l = buffer.length;
 91 |   let buf = new Int16Array(l / 3);
 92 | 
 93 |   while (l--) {
 94 |     if (l % 3 === 0) {
 95 |       buf[l / 3] = buffer[l] * 0xFFFF;
 96 |     }
 97 |   }
 98 |   return buf.buffer
 99 | }
100 | 
101 | /**
102 |  * Stops recording and closes everything down. Runs on error or on stop.
103 |  */
104 | function closeAll() {
105 |   // Clear the listeners (prevents issue if opening and closing repeatedly)
106 |   socket.off('speechData');
107 |   socket.off('googleCloudStreamError');
108 |   let tracks = globalStream ? globalStream.getTracks() : null;
109 |   let track = tracks ? tracks[0] : null;
110 |   if (track) {
111 |     track.stop();
112 |   }
113 | 
114 |   if (processor) {
115 |     if (input) {
116 |       try {
117 |         input.disconnect(processor);
118 |       } catch (error) {
119 |         console.warn('Attempt to disconnect input failed.')
120 |       }
121 |     }
122 |     processor.disconnect(context.destination);
123 |   }
124 |   if (context) {
125 |     context.close().then(function () {
126 |       input = null;
127 |       processor = null;
128 |       context = null;
129 |       AudioContext = null;
130 |     });
131 |   }
132 | }


--------------------------------------------------------------------------------