├── .github └── FUNDING.yml ├── .gitignore ├── LICENSE ├── README.md ├── backend ├── __init__.py ├── demo_web_app.py ├── google_speech_wrapper.py ├── requirements.txt └── settings.py ├── package-lock.json ├── package.json ├── public ├── favicon.ico ├── index.html ├── manifest.json └── robots.txt └── src ├── App.css ├── App.js ├── App.test.js ├── SettingsSection.js ├── TranscribeOutput.js ├── index.css ├── index.js ├── serviceWorker.js ├── setupTests.js └── utility_transcribe.js /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: saharmor 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | /.idea/ 131 | node_modules 132 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Sahar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | giant microphone
3 |

Real-time Transcription Playground

4 |

5 | 6 | A real-time transcription project using React and a socketio python server. The goal of this project is to enable developers to create web demos and speech2text prototypes with just a few lines of code. Examples can be medical dictation apps, a note-taking CRM for entrepreneurs, etc. 7 | 8 | *Currently only supports real-time transcription using Google Cloud Speech* 9 | 10 | # Demo 11 | https://user-images.githubusercontent.com/6180201/124362454-370e6600-dc35-11eb-8374-77da5aec25b2.mp4 12 | 13 | 14 | # Installation 15 | * Python 3 [instructions](https://realpython.com/installing-python/) 16 | * `yarn` [instructions](https://classic.yarnpkg.com/en/docs/install/#mac-stable) 17 | 18 | ## Google Speech API 19 | The code assumes an environment variable `GOOGLE_SERVICE_JSON_FILE` that points to a valid GCP service account file. 20 | 21 | If you need to get a service account: 22 | - Within your Google Cloud console, create or select a project 23 | - Enable the Cloud Speech API for that project 24 | - Create a service account 25 | - Download a private key as JSON 26 | 27 | More info in Google Cloud's docs [here](https://cloud.google.com/speech-to-text/docs/quickstart-client-libraries#before-you-begin) and [here](https://codelabs.developers.google.com/codelabs/cloud-speech-text-python3#0).
28 | 29 | Then, set the environment variable `GOOGLE_SERVICE_JSON_FILE` to the path of the JSON file containing your service account key, e.g. `/users/sahar/documents/sample-project-3c1a5892b00e.json`. Further details can be found in this [Medium article](https://medium.com/geekculture/how-to-build-a-full-stack-transcription-app-with-google-cloud-react-and-python-2dfdcb5e556f). 30 | 31 | # Setup 32 | 1. Clone or fork this repository 33 | 2. Create a virtual environment in the root directory: `python -m venv $ENV_NAME` 34 | 3. Activate the virtual environment: ` source $ENV_NAME/bin/activate` (for MacOS, Unix, or Linux users) or ` .\ENV_NAME\Scripts\activate` (for Windows users) 35 | 4. Install requirements: `pip install -r backend/requirements.txt` 36 | 5. Set your environment variable `GOOGLE_SERVICE_JSON_FILE` to point to your file path 37 | 6. Run `yarn install` in the root directory 38 | 7. Run `yarn start` to start the frontend and `start-backend` to run the backend 39 | -------------------------------------------------------------------------------- /backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saharmor/realtime-transcription-playground/5cb73a10acc222599084296f593060e7548667cd/backend/__init__.py -------------------------------------------------------------------------------- /backend/demo_web_app.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | import socketio 4 | from aiohttp import web 5 | 6 | from backend.settings import BACKEND_PORT 7 | from google_speech_wrapper import GoogleSpeechWrapper 8 | 9 | app = web.Application() 10 | sio = socketio.AsyncServer(cors_allowed_origins=[]) # * is bad 11 | 12 | # Binds our Socket.IO server to our web app instance 13 | sio.attach(app) 14 | 15 | 16 | @asyncio.coroutine 17 | @sio.on('startGoogleCloudStream') 18 | async def start_google_stream(sid, config): 19 | print(f'Starting streaming audio data from client {sid}') 20 | await GoogleSpeechWrapper.start_recognition_stream(sio, sid, config) 21 | 22 | 23 | @sio.on('binaryAudioData') 24 | async def receive_binary_audio_data(sid, message): 25 | GoogleSpeechWrapper.receive_data(sid, message) 26 | 27 | 28 | @sio.on('endGoogleCloudStream') 29 | async def close_google_stream(sid): 30 | print(f'Closing streaming data from client {sid}') 31 | await GoogleSpeechWrapper.stop_recognition_stream(sid) 32 | 33 | 34 | web.run_app(app, port=BACKEND_PORT) 35 | -------------------------------------------------------------------------------- /backend/google_speech_wrapper.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import queue 3 | import sys 4 | import threading 5 | from typing import Dict 6 | 7 | from google.cloud import speech 8 | 9 | from backend.settings import GOOGLE_SERVICE_JSON_FILE 10 | 11 | clients = {} 12 | 13 | 14 | class ClientData: 15 | def __init__(self, transcribe_thread, conn, config: Dict): 16 | self._buff = queue.Queue() 17 | self._thread = transcribe_thread 18 | self._closed = True 19 | self._conn = conn 20 | self.general_config = {dict_key: config[dict_key] for dict_key in config if dict_key != 'audio'} 21 | self.audio_config = config['audio'] 22 | 23 | async def close(self): 24 | self._closed = True 25 | self._buff.put(None) 26 | self._thread.join() 27 | await self._conn.emit('endGoogleCloudStream', '') 28 | 29 | def start_transcribing(self): 30 | self._closed = False 31 | self._thread.start() 32 | 33 | def add_data(self, data): 34 | self._buff.put(data) 35 | 36 | def generator(self): 37 | """ 38 | Code take and slightly modified from https://github.com/googleapis/python-speech/blob/master/samples/microphone/transcribe_streaming_infinite.py 39 | """ 40 | while not self._closed: 41 | # Use a blocking get() to ensure there's at least one chunk of 42 | # data, and stop iteration if the chunk is None, indicating the 43 | # end of the audio stream. 44 | chunk = self._buff.get() 45 | if chunk is None: 46 | return 47 | 48 | data = [chunk] 49 | 50 | # Now consume whatever other data's still buffered. 51 | while True: 52 | try: 53 | chunk = self._buff.get(block=False) 54 | if chunk is None: 55 | return 56 | data.append(chunk) 57 | except queue.Empty: 58 | break 59 | 60 | yield b"".join(data) 61 | 62 | async def send_client_data(self, data, is_final: bool): 63 | await self._conn.emit('speechData', {'data': data, 'isFinal': is_final}) 64 | 65 | 66 | async def listen_print_loop(responses, client: ClientData): 67 | """ 68 | Code taken and slightly modified from https://github.com/googleapis/python-speech/blob/master/samples/microphone/transcribe_streaming_infinite.py 69 | Iterates through server responses and sends them back to client. 70 | 71 | The responses passed is a generator that will block until a response 72 | is provided by the server. 73 | 74 | Each response may contain multiple results, and each result may contain 75 | multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we 76 | print only the transcription for the top alternative of the top result. 77 | 78 | In this case, responses are provided for interim results as well. If the 79 | response is an interim one, print a line feed at the end of it, to allow 80 | the next result to overwrite it, until the response is a final one. For the 81 | final one, print a newline to preserve the finalized transcription. 82 | """ 83 | num_chars_printed = 0 84 | interim_flush_counter = 0 85 | for response in responses: 86 | if not response.results: 87 | continue 88 | 89 | # The `results` list is consecutive. For streaming, we only care about 90 | # the first result being considered, since once it's `is_final`, it 91 | # moves on to considering the next utterance. 92 | result = response.results[0] 93 | if not result.alternatives: 94 | continue 95 | 96 | # Display the transcription of the top alternative. 97 | transcript = result.alternatives[0].transcript 98 | 99 | # Display interim results, but with a carriage return at the end of the 100 | # line, so subsequent lines will overwrite them. If the previous result was 101 | # longer than this one, we need to print some extra spaces to overwrite the previous result 102 | overwrite_chars = " " * (num_chars_printed - len(transcript)) 103 | 104 | if not result.is_final: 105 | sys.stdout.write(transcript + overwrite_chars + "\r") 106 | sys.stdout.flush() 107 | interim_flush_counter += 1 108 | 109 | if client and interim_flush_counter % 3 == 0: 110 | interim_flush_counter = 0 111 | await client.send_client_data(transcript + overwrite_chars + "\r", False) 112 | 113 | num_chars_printed = len(transcript) 114 | else: 115 | text = transcript + overwrite_chars 116 | print(text) 117 | 118 | if client: 119 | await client.send_client_data(text, True) 120 | 121 | num_chars_printed = 0 122 | 123 | 124 | class GoogleSpeechWrapper: 125 | encoding_map = {'LINEAR16': speech.RecognitionConfig.AudioEncoding.LINEAR16} 126 | 127 | @staticmethod 128 | async def start_listen(client_id: str): 129 | client = clients[client_id] 130 | speech_client = speech.SpeechClient.from_service_account_json(GOOGLE_SERVICE_JSON_FILE) 131 | config = speech.RecognitionConfig(encoding=GoogleSpeechWrapper.encoding_map[client.audio_config['encoding']], sample_rate_hertz=client.audio_config['sampleRateHertz'], 132 | language_code=client.audio_config['languageCode'], enable_automatic_punctuation=True) 133 | streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=client.general_config['interimResults']) 134 | 135 | audio_generator = client.generator() 136 | requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) 137 | responses = speech_client.streaming_recognize(streaming_config, requests) 138 | await listen_print_loop(responses, client) 139 | 140 | # In case of ERROR 141 | # client.emit('googleCloudStreamError', err); 142 | # client._conn.emit('endGoogleCloudStream', '') 143 | 144 | @staticmethod 145 | async def start_recognition_stream(sio, client_id: str, config: Dict): 146 | if client_id not in clients: 147 | clients[client_id] = ClientData(threading.Thread(target=asyncio.run, args=(GoogleSpeechWrapper.start_listen(client_id),)), sio, config) 148 | clients[client_id].start_transcribing() 149 | else: 150 | print('Warning - already running transcription for client') 151 | 152 | @staticmethod 153 | async def stop_recognition_stream(client_id: str): 154 | if client_id in clients: 155 | await clients[client_id].close() 156 | del clients[client_id] 157 | 158 | @staticmethod 159 | def receive_data(client_id: str, data): 160 | if client_id not in clients: 161 | return 162 | 163 | clients[client_id].add_data(data) 164 | -------------------------------------------------------------------------------- /backend/requirements.txt: -------------------------------------------------------------------------------- 1 | python-socketio==5.3.0 2 | aiohttp==3.7.4.post0 3 | google-cloud-speech==2.5.0 -------------------------------------------------------------------------------- /backend/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | GOOGLE_SERVICE_JSON_FILE = os.environ['GOOGLE_SERVICE_JSON_FILE'] 4 | BACKEND_PORT = 10000 5 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "realtime-transcribe-playground", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@material-ui/core": "^4.11.4", 7 | "@testing-library/jest-dom": "^4.2.4", 8 | "@testing-library/react": "^9.3.2", 9 | "@testing-library/user-event": "^7.1.2", 10 | "axios": "^0.21.1", 11 | "bootstrap": "^4.5.0", 12 | "lodash": "^4.17.19", 13 | "react": "^16.13.1", 14 | "react-bootstrap": "^1.2.2", 15 | "react-dom": "^16.13.1", 16 | "react-scripts": "^3.4.1", 17 | "react-spinners": "^0.10.6", 18 | "socket.io-client": "^4.1.2" 19 | }, 20 | "scripts": { 21 | "start": "react-scripts start", 22 | "start-backend": "cd backend && source venv/bin/activate && python demo_web_app.py", 23 | "build": "react-scripts build", 24 | "test": "react-scripts test", 25 | "eject": "react-scripts eject" 26 | }, 27 | "eslintConfig": { 28 | "extends": "react-app" 29 | }, 30 | "browserslist": { 31 | "production": [ 32 | ">0.2%", 33 | "not dead", 34 | "not op_mini all" 35 | ], 36 | "development": [ 37 | "last 1 chrome version", 38 | "last 1 firefox version", 39 | "last 1 safari version" 40 | ] 41 | }, 42 | "proxy": "http://localhost:5000" 43 | } 44 | -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saharmor/realtime-transcription-playground/5cb73a10acc222599084296f593060e7548667cd/public/favicon.ico -------------------------------------------------------------------------------- /public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 21 | Realtime Transcription Playground 22 | 23 | 24 | 25 |
26 | 34 | 35 | -------------------------------------------------------------------------------- /public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "Transcription Playground", 3 | "name": "Realtime Transcription Playground", 4 | "icons": [], 5 | "start_url": ".", 6 | "display": "standalone", 7 | "theme_color": "#000000", 8 | "background_color": "#ffffff" 9 | } 10 | -------------------------------------------------------------------------------- /public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /src/App.css: -------------------------------------------------------------------------------- 1 | html { 2 | height: 100%; 3 | } 4 | 5 | body { 6 | min-height: 100%; 7 | } 8 | 9 | #root { 10 | height: 100vh; 11 | background-color: #f6f6ef; 12 | display: flex; 13 | flex-direction: row; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /src/App.js: -------------------------------------------------------------------------------- 1 | import React, {useState} from "react"; 2 | import {Button} from "react-bootstrap"; 3 | import withStyles from "@material-ui/core/styles/withStyles"; 4 | import Typography from "@material-ui/core/Typography"; 5 | import "bootstrap/dist/css/bootstrap.min.css"; 6 | import "./App.css"; 7 | import speechToTextUtils from "./utility_transcribe"; 8 | import TranscribeOutput from "./TranscribeOutput"; 9 | import SettingsSections from "./SettingsSection"; 10 | 11 | const useStyles = () => ({ 12 | root: { 13 | display: 'flex', 14 | flex: '1', 15 | margin: '100px 0px 100px 0px', 16 | alignItems: 'center', 17 | textAlign: 'center', 18 | flexDirection: 'column', 19 | }, 20 | title: { 21 | marginBottom: '20px', 22 | }, 23 | settingsSection: { 24 | marginBottom: '20px', 25 | }, 26 | buttonsSection: { 27 | marginBottom: '40px', 28 | }, 29 | }); 30 | 31 | const App = ({classes}) => { 32 | const [transcribedData, setTranscribedData] = useState([]); 33 | const [interimTranscribedData, setInterimTranscribedData] = useState(''); 34 | const [isRecording, setIsRecording] = useState(false); 35 | const [selectedLanguage, setSelectedLanguage] = useState('en-US'); 36 | 37 | const supportedLanguages = {'en-US': 'English', 'de-DE': 'German', 'fr-FR': 'French', 'es-ES': 'Spanish'} 38 | 39 | function flushInterimData() { 40 | if (interimTranscribedData !== '') { 41 | setInterimTranscribedData('') 42 | setTranscribedData(oldData => [...oldData, interimTranscribedData]) 43 | } 44 | } 45 | 46 | function handleDataReceived(data, isFinal) { 47 | if (isFinal) { 48 | setInterimTranscribedData('') 49 | setTranscribedData(oldData => [...oldData, data]) 50 | } else { 51 | setInterimTranscribedData(data) 52 | } 53 | } 54 | 55 | function getTranscriptionConfig() { 56 | return { 57 | audio: { 58 | encoding: 'LINEAR16', 59 | sampleRateHertz: 16000, 60 | languageCode: selectedLanguage, 61 | }, 62 | interimResults: true 63 | } 64 | } 65 | 66 | function onStart() { 67 | setTranscribedData([]) 68 | setIsRecording(true) 69 | 70 | speechToTextUtils.initRecording( 71 | getTranscriptionConfig(), 72 | handleDataReceived, 73 | (error) => { 74 | console.error('Error when transcribing', error); 75 | setIsRecording(false) 76 | // No further action needed, as stream already closes itself on error 77 | }); 78 | } 79 | 80 | function onStop() { 81 | setIsRecording(false) 82 | flushInterimData() // A safety net if Google's Speech API doesn't work as expected, i.e. always sends the final result 83 | speechToTextUtils.stopRecording(); 84 | } 85 | 86 | return ( 87 |
88 |
89 | 90 | Your Transcription App 🎤 91 | 92 |
93 |
94 | 96 |
97 |
98 | {!isRecording && } 99 | {isRecording && } 100 |
101 |
102 | 103 |
104 |
105 | ); 106 | } 107 | 108 | export default withStyles(useStyles)(App); 109 | -------------------------------------------------------------------------------- /src/App.test.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { render } from '@testing-library/react'; 3 | import App from './App'; 4 | 5 | test('renders learn react link', () => { 6 | const { getByText } = render(); 7 | const linkElement = getByText(/learn react/i); 8 | expect(linkElement).toBeInTheDocument(); 9 | }); 10 | -------------------------------------------------------------------------------- /src/SettingsSection.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import Select from '@material-ui/core/Select'; 3 | import MenuItem from '@material-ui/core/MenuItem'; 4 | 5 | 6 | const SettingsSections = ({possibleLanguages, selectedLanguage, onLanguageChanged}) => { 7 | function onLangChangedLocal(event) { 8 | onLanguageChanged(event.target.value) 9 | } 10 | 11 | return ( 12 |
13 | 18 |
19 | ) 20 | } 21 | 22 | export default SettingsSections; 23 | -------------------------------------------------------------------------------- /src/TranscribeOutput.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import withStyles from "@material-ui/core/styles/withStyles"; 3 | import Typography from "@material-ui/core/Typography"; 4 | 5 | 6 | const useStyles = () => ({ 7 | root: { 8 | maxWidth: '800px', 9 | display: 'flex' 10 | }, 11 | outputText: { 12 | marginLeft: '8px', 13 | color: '#ef395a', 14 | } 15 | }); 16 | 17 | const TranscribeOutput = ({classes, transcribedText, interimTranscribedText}) => { 18 | if (transcribedText.length === 0 && interimTranscribedText.length === 0) { 19 | return ...; 20 | } 21 | 22 | return ( 23 |
24 | {transcribedText} 25 | {interimTranscribedText} 26 |
27 | ) 28 | } 29 | 30 | export default withStyles(useStyles)(TranscribeOutput); 31 | -------------------------------------------------------------------------------- /src/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 4 | 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', 5 | sans-serif; 6 | -webkit-font-smoothing: antialiased; 7 | -moz-osx-font-smoothing: grayscale; 8 | } 9 | 10 | code { 11 | font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', 12 | monospace; 13 | } 14 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import './index.css'; 4 | import App from './App'; 5 | import * as serviceWorker from './serviceWorker'; 6 | 7 | ReactDOM.render( 8 | 9 | 10 | , 11 | document.getElementById('root') 12 | ); 13 | 14 | // If you want your app to work offline and load faster, you can change 15 | // unregister() to register() below. Note this comes with some pitfalls. 16 | // Learn more about service workers: https://bit.ly/CRA-PWA 17 | serviceWorker.unregister(); 18 | -------------------------------------------------------------------------------- /src/serviceWorker.js: -------------------------------------------------------------------------------- 1 | // This optional code is used to register a service worker. 2 | // register() is not called by default. 3 | 4 | // This lets the app load faster on subsequent visits in production, and gives 5 | // it offline capabilities. However, it also means that developers (and users) 6 | // will only see deployed updates on subsequent visits to a page, after all the 7 | // existing tabs open on the page have been closed, since previously cached 8 | // resources are updated in the background. 9 | 10 | // To learn more about the benefits of this model and instructions on how to 11 | // opt-in, read https://bit.ly/CRA-PWA 12 | 13 | const isLocalhost = Boolean( 14 | window.location.hostname === 'localhost' || 15 | // [::1] is the IPv6 localhost address. 16 | window.location.hostname === '[::1]' || 17 | // 127.0.0.0/8 are considered localhost for IPv4. 18 | window.location.hostname.match( 19 | /^127(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}$/ 20 | ) 21 | ); 22 | 23 | export function register(config) { 24 | if (process.env.NODE_ENV === 'production' && 'serviceWorker' in navigator) { 25 | // The URL constructor is available in all browsers that support SW. 26 | const publicUrl = new URL(process.env.PUBLIC_URL, window.location.href); 27 | if (publicUrl.origin !== window.location.origin) { 28 | // Our service worker won't work if PUBLIC_URL is on a different origin 29 | // from what our page is served on. This might happen if a CDN is used to 30 | // serve assets; see https://github.com/facebook/create-react-app/issues/2374 31 | return; 32 | } 33 | 34 | window.addEventListener('load', () => { 35 | const swUrl = `${process.env.PUBLIC_URL}/service-worker.js`; 36 | 37 | if (isLocalhost) { 38 | // This is running on localhost. Let's check if a service worker still exists or not. 39 | checkValidServiceWorker(swUrl, config); 40 | 41 | // Add some additional logging to localhost, pointing developers to the 42 | // service worker/PWA documentation. 43 | navigator.serviceWorker.ready.then(() => { 44 | console.log( 45 | 'This web app is being served cache-first by a service ' + 46 | 'worker. To learn more, visit https://bit.ly/CRA-PWA' 47 | ); 48 | }); 49 | } else { 50 | // Is not localhost. Just register service worker 51 | registerValidSW(swUrl, config); 52 | } 53 | }); 54 | } 55 | } 56 | 57 | function registerValidSW(swUrl, config) { 58 | navigator.serviceWorker 59 | .register(swUrl) 60 | .then(registration => { 61 | registration.onupdatefound = () => { 62 | const installingWorker = registration.installing; 63 | if (installingWorker == null) { 64 | return; 65 | } 66 | installingWorker.onstatechange = () => { 67 | if (installingWorker.state === 'installed') { 68 | if (navigator.serviceWorker.controller) { 69 | // At this point, the updated precached content has been fetched, 70 | // but the previous service worker will still serve the older 71 | // content until all client tabs are closed. 72 | console.log( 73 | 'New content is available and will be used when all ' + 74 | 'tabs for this page are closed. See https://bit.ly/CRA-PWA.' 75 | ); 76 | 77 | // Execute callback 78 | if (config && config.onUpdate) { 79 | config.onUpdate(registration); 80 | } 81 | } else { 82 | // At this point, everything has been precached. 83 | // It's the perfect time to display a 84 | // "Content is cached for offline use." message. 85 | console.log('Content is cached for offline use.'); 86 | 87 | // Execute callback 88 | if (config && config.onSuccess) { 89 | config.onSuccess(registration); 90 | } 91 | } 92 | } 93 | }; 94 | }; 95 | }) 96 | .catch(error => { 97 | console.error('Error during service worker registration:', error); 98 | }); 99 | } 100 | 101 | function checkValidServiceWorker(swUrl, config) { 102 | // Check if the service worker can be found. If it can't reload the page. 103 | fetch(swUrl, { 104 | headers: { 'Service-Worker': 'script' }, 105 | }) 106 | .then(response => { 107 | // Ensure service worker exists, and that we really are getting a JS file. 108 | const contentType = response.headers.get('content-type'); 109 | if ( 110 | response.status === 404 || 111 | (contentType != null && contentType.indexOf('javascript') === -1) 112 | ) { 113 | // No service worker found. Probably a different app. Reload the page. 114 | navigator.serviceWorker.ready.then(registration => { 115 | registration.unregister().then(() => { 116 | window.location.reload(); 117 | }); 118 | }); 119 | } else { 120 | // Service worker found. Proceed as normal. 121 | registerValidSW(swUrl, config); 122 | } 123 | }) 124 | .catch(() => { 125 | console.log( 126 | 'No internet connection found. App is running in offline mode.' 127 | ); 128 | }); 129 | } 130 | 131 | export function unregister() { 132 | if ('serviceWorker' in navigator) { 133 | navigator.serviceWorker.ready 134 | .then(registration => { 135 | registration.unregister(); 136 | }) 137 | .catch(error => { 138 | console.error(error.message); 139 | }); 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/setupTests.js: -------------------------------------------------------------------------------- 1 | // jest-dom adds custom jest matchers for asserting on DOM nodes. 2 | // allows you to do things like: 3 | // expect(element).toHaveTextContent(/react/i) 4 | // learn more: https://github.com/testing-library/jest-dom 5 | import '@testing-library/jest-dom/extend-expect'; 6 | -------------------------------------------------------------------------------- /src/utility_transcribe.js: -------------------------------------------------------------------------------- 1 | import io from 'socket.io-client'; 2 | 3 | const socket = new io.connect("http://0.0.0.0:10000/", {transports: ['websocket']}); 4 | 5 | // Stream Audio 6 | let bufferSize = 2048, 7 | AudioContext, 8 | context, 9 | processor, 10 | input, 11 | globalStream; 12 | 13 | const mediaConstraints = { 14 | audio: true, 15 | video: false 16 | }; 17 | 18 | let AudioStreamer = { 19 | /** 20 | * @param {object} transcribeConfig Transcription configuration such as language, encoding, etc. 21 | * @param {function} onData Callback to run on data each time it's received 22 | * @param {function} onError Callback to run on an error if one is emitted. 23 | */ 24 | initRecording: function (transcribeConfig, onData, onError) { 25 | socket.emit('startGoogleCloudStream', {...transcribeConfig}); 26 | AudioContext = window.AudioContext || window.webkitAudioContext; 27 | context = new AudioContext(); 28 | processor = context.createScriptProcessor(bufferSize, 1, 1); 29 | processor.connect(context.destination); 30 | context.resume(); 31 | 32 | const handleSuccess = function (stream) { 33 | globalStream = stream; 34 | input = context.createMediaStreamSource(stream); 35 | input.connect(processor); 36 | 37 | processor.onaudioprocess = function (e) { 38 | microphoneProcess(e); 39 | }; 40 | }; 41 | 42 | navigator.mediaDevices.getUserMedia(mediaConstraints) 43 | .then(handleSuccess); 44 | 45 | if (onData) { 46 | socket.on('speechData', (response) => { 47 | onData(response.data, response.isFinal); 48 | }); 49 | } 50 | 51 | socket.on('googleCloudStreamError', (error) => { 52 | if (onError) { 53 | onError('error'); 54 | } 55 | closeAll(); 56 | }); 57 | 58 | socket.on('endGoogleCloudStream', () => { 59 | closeAll(); 60 | }); 61 | }, 62 | 63 | stopRecording: function () { 64 | socket.emit('endGoogleCloudStream'); 65 | closeAll(); 66 | } 67 | } 68 | 69 | export default AudioStreamer; 70 | 71 | // Helper functions 72 | /** 73 | * Processes microphone data into a data stream 74 | * 75 | * @param {object} e Input from the microphone 76 | */ 77 | function microphoneProcess(e) { 78 | const left = e.inputBuffer.getChannelData(0); 79 | const left16 = convertFloat32ToInt16(left); 80 | socket.emit('binaryAudioData', left16); 81 | } 82 | 83 | /** 84 | * Converts a buffer from float32 to int16. Necessary for streaming. 85 | * sampleRateHertz of 1600. 86 | * 87 | * @param {object} buffer Buffer being converted 88 | */ 89 | function convertFloat32ToInt16(buffer) { 90 | let l = buffer.length; 91 | let buf = new Int16Array(l / 3); 92 | 93 | while (l--) { 94 | if (l % 3 === 0) { 95 | buf[l / 3] = buffer[l] * 0xFFFF; 96 | } 97 | } 98 | return buf.buffer 99 | } 100 | 101 | /** 102 | * Stops recording and closes everything down. Runs on error or on stop. 103 | */ 104 | function closeAll() { 105 | // Clear the listeners (prevents issue if opening and closing repeatedly) 106 | socket.off('speechData'); 107 | socket.off('googleCloudStreamError'); 108 | let tracks = globalStream ? globalStream.getTracks() : null; 109 | let track = tracks ? tracks[0] : null; 110 | if (track) { 111 | track.stop(); 112 | } 113 | 114 | if (processor) { 115 | if (input) { 116 | try { 117 | input.disconnect(processor); 118 | } catch (error) { 119 | console.warn('Attempt to disconnect input failed.') 120 | } 121 | } 122 | processor.disconnect(context.destination); 123 | } 124 | if (context) { 125 | context.close().then(function () { 126 | input = null; 127 | processor = null; 128 | context = null; 129 | AudioContext = null; 130 | }); 131 | } 132 | } --------------------------------------------------------------------------------