├── .github
└── FUNDING.yml
├── .gitignore
├── LICENSE
├── README.md
├── backend
├── __init__.py
├── demo_web_app.py
├── google_speech_wrapper.py
├── requirements.txt
└── settings.py
├── package-lock.json
├── package.json
├── public
├── favicon.ico
├── index.html
├── manifest.json
└── robots.txt
└── src
├── App.css
├── App.js
├── App.test.js
├── SettingsSection.js
├── TranscribeOutput.js
├── index.css
├── index.js
├── serviceWorker.js
├── setupTests.js
└── utility_transcribe.js
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: saharmor
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 | /.idea/
131 | node_modules
132 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Sahar
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | 
3 |
Real-time Transcription Playground
4 |
5 |
6 | A real-time transcription project using React and a socketio python server. The goal of this project is to enable developers to create web demos and speech2text prototypes with just a few lines of code. Examples can be medical dictation apps, a note-taking CRM for entrepreneurs, etc.
7 |
8 | *Currently only supports real-time transcription using Google Cloud Speech*
9 |
10 | # Demo
11 | https://user-images.githubusercontent.com/6180201/124362454-370e6600-dc35-11eb-8374-77da5aec25b2.mp4
12 |
13 |
14 | # Installation
15 | * Python 3 [instructions](https://realpython.com/installing-python/)
16 | * `yarn` [instructions](https://classic.yarnpkg.com/en/docs/install/#mac-stable)
17 |
18 | ## Google Speech API
19 | The code assumes an environment variable `GOOGLE_SERVICE_JSON_FILE` that points to a valid GCP service account file.
20 |
21 | If you need to get a service account:
22 | - Within your Google Cloud console, create or select a project
23 | - Enable the Cloud Speech API for that project
24 | - Create a service account
25 | - Download a private key as JSON
26 |
27 | More info in Google Cloud's docs [here](https://cloud.google.com/speech-to-text/docs/quickstart-client-libraries#before-you-begin) and [here](https://codelabs.developers.google.com/codelabs/cloud-speech-text-python3#0).
28 |
29 | Then, set the environment variable `GOOGLE_SERVICE_JSON_FILE` to the path of the JSON file containing your service account key, e.g. `/users/sahar/documents/sample-project-3c1a5892b00e.json`. Further details can be found in this [Medium article](https://medium.com/geekculture/how-to-build-a-full-stack-transcription-app-with-google-cloud-react-and-python-2dfdcb5e556f).
30 |
31 | # Setup
32 | 1. Clone or fork this repository
33 | 2. Create a virtual environment in the root directory: `python -m venv $ENV_NAME`
34 | 3. Activate the virtual environment: ` source $ENV_NAME/bin/activate` (for MacOS, Unix, or Linux users) or ` .\ENV_NAME\Scripts\activate` (for Windows users)
35 | 4. Install requirements: `pip install -r backend/requirements.txt`
36 | 5. Set your environment variable `GOOGLE_SERVICE_JSON_FILE` to point to your file path
37 | 6. Run `yarn install` in the root directory
38 | 7. Run `yarn start` to start the frontend and `start-backend` to run the backend
39 |
--------------------------------------------------------------------------------
/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saharmor/realtime-transcription-playground/5cb73a10acc222599084296f593060e7548667cd/backend/__init__.py
--------------------------------------------------------------------------------
/backend/demo_web_app.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | import socketio
4 | from aiohttp import web
5 |
6 | from backend.settings import BACKEND_PORT
7 | from google_speech_wrapper import GoogleSpeechWrapper
8 |
9 | app = web.Application()
10 | sio = socketio.AsyncServer(cors_allowed_origins=[]) # * is bad
11 |
12 | # Binds our Socket.IO server to our web app instance
13 | sio.attach(app)
14 |
15 |
16 | @asyncio.coroutine
17 | @sio.on('startGoogleCloudStream')
18 | async def start_google_stream(sid, config):
19 | print(f'Starting streaming audio data from client {sid}')
20 | await GoogleSpeechWrapper.start_recognition_stream(sio, sid, config)
21 |
22 |
23 | @sio.on('binaryAudioData')
24 | async def receive_binary_audio_data(sid, message):
25 | GoogleSpeechWrapper.receive_data(sid, message)
26 |
27 |
28 | @sio.on('endGoogleCloudStream')
29 | async def close_google_stream(sid):
30 | print(f'Closing streaming data from client {sid}')
31 | await GoogleSpeechWrapper.stop_recognition_stream(sid)
32 |
33 |
34 | web.run_app(app, port=BACKEND_PORT)
35 |
--------------------------------------------------------------------------------
/backend/google_speech_wrapper.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import queue
3 | import sys
4 | import threading
5 | from typing import Dict
6 |
7 | from google.cloud import speech
8 |
9 | from backend.settings import GOOGLE_SERVICE_JSON_FILE
10 |
11 | clients = {}
12 |
13 |
14 | class ClientData:
15 | def __init__(self, transcribe_thread, conn, config: Dict):
16 | self._buff = queue.Queue()
17 | self._thread = transcribe_thread
18 | self._closed = True
19 | self._conn = conn
20 | self.general_config = {dict_key: config[dict_key] for dict_key in config if dict_key != 'audio'}
21 | self.audio_config = config['audio']
22 |
23 | async def close(self):
24 | self._closed = True
25 | self._buff.put(None)
26 | self._thread.join()
27 | await self._conn.emit('endGoogleCloudStream', '')
28 |
29 | def start_transcribing(self):
30 | self._closed = False
31 | self._thread.start()
32 |
33 | def add_data(self, data):
34 | self._buff.put(data)
35 |
36 | def generator(self):
37 | """
38 | Code take and slightly modified from https://github.com/googleapis/python-speech/blob/master/samples/microphone/transcribe_streaming_infinite.py
39 | """
40 | while not self._closed:
41 | # Use a blocking get() to ensure there's at least one chunk of
42 | # data, and stop iteration if the chunk is None, indicating the
43 | # end of the audio stream.
44 | chunk = self._buff.get()
45 | if chunk is None:
46 | return
47 |
48 | data = [chunk]
49 |
50 | # Now consume whatever other data's still buffered.
51 | while True:
52 | try:
53 | chunk = self._buff.get(block=False)
54 | if chunk is None:
55 | return
56 | data.append(chunk)
57 | except queue.Empty:
58 | break
59 |
60 | yield b"".join(data)
61 |
62 | async def send_client_data(self, data, is_final: bool):
63 | await self._conn.emit('speechData', {'data': data, 'isFinal': is_final})
64 |
65 |
66 | async def listen_print_loop(responses, client: ClientData):
67 | """
68 | Code taken and slightly modified from https://github.com/googleapis/python-speech/blob/master/samples/microphone/transcribe_streaming_infinite.py
69 | Iterates through server responses and sends them back to client.
70 |
71 | The responses passed is a generator that will block until a response
72 | is provided by the server.
73 |
74 | Each response may contain multiple results, and each result may contain
75 | multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we
76 | print only the transcription for the top alternative of the top result.
77 |
78 | In this case, responses are provided for interim results as well. If the
79 | response is an interim one, print a line feed at the end of it, to allow
80 | the next result to overwrite it, until the response is a final one. For the
81 | final one, print a newline to preserve the finalized transcription.
82 | """
83 | num_chars_printed = 0
84 | interim_flush_counter = 0
85 | for response in responses:
86 | if not response.results:
87 | continue
88 |
89 | # The `results` list is consecutive. For streaming, we only care about
90 | # the first result being considered, since once it's `is_final`, it
91 | # moves on to considering the next utterance.
92 | result = response.results[0]
93 | if not result.alternatives:
94 | continue
95 |
96 | # Display the transcription of the top alternative.
97 | transcript = result.alternatives[0].transcript
98 |
99 | # Display interim results, but with a carriage return at the end of the
100 | # line, so subsequent lines will overwrite them. If the previous result was
101 | # longer than this one, we need to print some extra spaces to overwrite the previous result
102 | overwrite_chars = " " * (num_chars_printed - len(transcript))
103 |
104 | if not result.is_final:
105 | sys.stdout.write(transcript + overwrite_chars + "\r")
106 | sys.stdout.flush()
107 | interim_flush_counter += 1
108 |
109 | if client and interim_flush_counter % 3 == 0:
110 | interim_flush_counter = 0
111 | await client.send_client_data(transcript + overwrite_chars + "\r", False)
112 |
113 | num_chars_printed = len(transcript)
114 | else:
115 | text = transcript + overwrite_chars
116 | print(text)
117 |
118 | if client:
119 | await client.send_client_data(text, True)
120 |
121 | num_chars_printed = 0
122 |
123 |
124 | class GoogleSpeechWrapper:
125 | encoding_map = {'LINEAR16': speech.RecognitionConfig.AudioEncoding.LINEAR16}
126 |
127 | @staticmethod
128 | async def start_listen(client_id: str):
129 | client = clients[client_id]
130 | speech_client = speech.SpeechClient.from_service_account_json(GOOGLE_SERVICE_JSON_FILE)
131 | config = speech.RecognitionConfig(encoding=GoogleSpeechWrapper.encoding_map[client.audio_config['encoding']], sample_rate_hertz=client.audio_config['sampleRateHertz'],
132 | language_code=client.audio_config['languageCode'], enable_automatic_punctuation=True)
133 | streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=client.general_config['interimResults'])
134 |
135 | audio_generator = client.generator()
136 | requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator)
137 | responses = speech_client.streaming_recognize(streaming_config, requests)
138 | await listen_print_loop(responses, client)
139 |
140 | # In case of ERROR
141 | # client.emit('googleCloudStreamError', err);
142 | # client._conn.emit('endGoogleCloudStream', '')
143 |
144 | @staticmethod
145 | async def start_recognition_stream(sio, client_id: str, config: Dict):
146 | if client_id not in clients:
147 | clients[client_id] = ClientData(threading.Thread(target=asyncio.run, args=(GoogleSpeechWrapper.start_listen(client_id),)), sio, config)
148 | clients[client_id].start_transcribing()
149 | else:
150 | print('Warning - already running transcription for client')
151 |
152 | @staticmethod
153 | async def stop_recognition_stream(client_id: str):
154 | if client_id in clients:
155 | await clients[client_id].close()
156 | del clients[client_id]
157 |
158 | @staticmethod
159 | def receive_data(client_id: str, data):
160 | if client_id not in clients:
161 | return
162 |
163 | clients[client_id].add_data(data)
164 |
--------------------------------------------------------------------------------
/backend/requirements.txt:
--------------------------------------------------------------------------------
1 | python-socketio==5.3.0
2 | aiohttp==3.7.4.post0
3 | google-cloud-speech==2.5.0
--------------------------------------------------------------------------------
/backend/settings.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | GOOGLE_SERVICE_JSON_FILE = os.environ['GOOGLE_SERVICE_JSON_FILE']
4 | BACKEND_PORT = 10000
5 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "realtime-transcribe-playground",
3 | "version": "0.1.0",
4 | "private": true,
5 | "dependencies": {
6 | "@material-ui/core": "^4.11.4",
7 | "@testing-library/jest-dom": "^4.2.4",
8 | "@testing-library/react": "^9.3.2",
9 | "@testing-library/user-event": "^7.1.2",
10 | "axios": "^0.21.1",
11 | "bootstrap": "^4.5.0",
12 | "lodash": "^4.17.19",
13 | "react": "^16.13.1",
14 | "react-bootstrap": "^1.2.2",
15 | "react-dom": "^16.13.1",
16 | "react-scripts": "^3.4.1",
17 | "react-spinners": "^0.10.6",
18 | "socket.io-client": "^4.1.2"
19 | },
20 | "scripts": {
21 | "start": "react-scripts start",
22 | "start-backend": "cd backend && source venv/bin/activate && python demo_web_app.py",
23 | "build": "react-scripts build",
24 | "test": "react-scripts test",
25 | "eject": "react-scripts eject"
26 | },
27 | "eslintConfig": {
28 | "extends": "react-app"
29 | },
30 | "browserslist": {
31 | "production": [
32 | ">0.2%",
33 | "not dead",
34 | "not op_mini all"
35 | ],
36 | "development": [
37 | "last 1 chrome version",
38 | "last 1 firefox version",
39 | "last 1 safari version"
40 | ]
41 | },
42 | "proxy": "http://localhost:5000"
43 | }
44 |
--------------------------------------------------------------------------------
/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saharmor/realtime-transcription-playground/5cb73a10acc222599084296f593060e7548667cd/public/favicon.ico
--------------------------------------------------------------------------------
/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
12 |
13 |
21 | Realtime Transcription Playground
22 |
23 |
24 |
25 |
26 |
34 |
35 |
--------------------------------------------------------------------------------
/public/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "short_name": "Transcription Playground",
3 | "name": "Realtime Transcription Playground",
4 | "icons": [],
5 | "start_url": ".",
6 | "display": "standalone",
7 | "theme_color": "#000000",
8 | "background_color": "#ffffff"
9 | }
10 |
--------------------------------------------------------------------------------
/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 | Disallow:
4 |
--------------------------------------------------------------------------------
/src/App.css:
--------------------------------------------------------------------------------
1 | html {
2 | height: 100%;
3 | }
4 |
5 | body {
6 | min-height: 100%;
7 | }
8 |
9 | #root {
10 | height: 100vh;
11 | background-color: #f6f6ef;
12 | display: flex;
13 | flex-direction: row;
14 | }
15 |
16 |
--------------------------------------------------------------------------------
/src/App.js:
--------------------------------------------------------------------------------
1 | import React, {useState} from "react";
2 | import {Button} from "react-bootstrap";
3 | import withStyles from "@material-ui/core/styles/withStyles";
4 | import Typography from "@material-ui/core/Typography";
5 | import "bootstrap/dist/css/bootstrap.min.css";
6 | import "./App.css";
7 | import speechToTextUtils from "./utility_transcribe";
8 | import TranscribeOutput from "./TranscribeOutput";
9 | import SettingsSections from "./SettingsSection";
10 |
11 | const useStyles = () => ({
12 | root: {
13 | display: 'flex',
14 | flex: '1',
15 | margin: '100px 0px 100px 0px',
16 | alignItems: 'center',
17 | textAlign: 'center',
18 | flexDirection: 'column',
19 | },
20 | title: {
21 | marginBottom: '20px',
22 | },
23 | settingsSection: {
24 | marginBottom: '20px',
25 | },
26 | buttonsSection: {
27 | marginBottom: '40px',
28 | },
29 | });
30 |
31 | const App = ({classes}) => {
32 | const [transcribedData, setTranscribedData] = useState([]);
33 | const [interimTranscribedData, setInterimTranscribedData] = useState('');
34 | const [isRecording, setIsRecording] = useState(false);
35 | const [selectedLanguage, setSelectedLanguage] = useState('en-US');
36 |
37 | const supportedLanguages = {'en-US': 'English', 'de-DE': 'German', 'fr-FR': 'French', 'es-ES': 'Spanish'}
38 |
39 | function flushInterimData() {
40 | if (interimTranscribedData !== '') {
41 | setInterimTranscribedData('')
42 | setTranscribedData(oldData => [...oldData, interimTranscribedData])
43 | }
44 | }
45 |
46 | function handleDataReceived(data, isFinal) {
47 | if (isFinal) {
48 | setInterimTranscribedData('')
49 | setTranscribedData(oldData => [...oldData, data])
50 | } else {
51 | setInterimTranscribedData(data)
52 | }
53 | }
54 |
55 | function getTranscriptionConfig() {
56 | return {
57 | audio: {
58 | encoding: 'LINEAR16',
59 | sampleRateHertz: 16000,
60 | languageCode: selectedLanguage,
61 | },
62 | interimResults: true
63 | }
64 | }
65 |
66 | function onStart() {
67 | setTranscribedData([])
68 | setIsRecording(true)
69 |
70 | speechToTextUtils.initRecording(
71 | getTranscriptionConfig(),
72 | handleDataReceived,
73 | (error) => {
74 | console.error('Error when transcribing', error);
75 | setIsRecording(false)
76 | // No further action needed, as stream already closes itself on error
77 | });
78 | }
79 |
80 | function onStop() {
81 | setIsRecording(false)
82 | flushInterimData() // A safety net if Google's Speech API doesn't work as expected, i.e. always sends the final result
83 | speechToTextUtils.stopRecording();
84 | }
85 |
86 | return (
87 |
88 |
89 |
90 | Your Transcription App 🎤
91 |
92 |
93 |
94 |
96 |
97 |
98 | {!isRecording && }
99 | {isRecording && }
100 |
101 |
102 |
103 |
104 |
105 | );
106 | }
107 |
108 | export default withStyles(useStyles)(App);
109 |
--------------------------------------------------------------------------------
/src/App.test.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import { render } from '@testing-library/react';
3 | import App from './App';
4 |
5 | test('renders learn react link', () => {
6 | const { getByText } = render();
7 | const linkElement = getByText(/learn react/i);
8 | expect(linkElement).toBeInTheDocument();
9 | });
10 |
--------------------------------------------------------------------------------
/src/SettingsSection.js:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import Select from '@material-ui/core/Select';
3 | import MenuItem from '@material-ui/core/MenuItem';
4 |
5 |
6 | const SettingsSections = ({possibleLanguages, selectedLanguage, onLanguageChanged}) => {
7 | function onLangChangedLocal(event) {
8 | onLanguageChanged(event.target.value)
9 | }
10 |
11 | return (
12 |
13 |
18 |
19 | )
20 | }
21 |
22 | export default SettingsSections;
23 |
--------------------------------------------------------------------------------
/src/TranscribeOutput.js:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import withStyles from "@material-ui/core/styles/withStyles";
3 | import Typography from "@material-ui/core/Typography";
4 |
5 |
6 | const useStyles = () => ({
7 | root: {
8 | maxWidth: '800px',
9 | display: 'flex'
10 | },
11 | outputText: {
12 | marginLeft: '8px',
13 | color: '#ef395a',
14 | }
15 | });
16 |
17 | const TranscribeOutput = ({classes, transcribedText, interimTranscribedText}) => {
18 | if (transcribedText.length === 0 && interimTranscribedText.length === 0) {
19 | return ...;
20 | }
21 |
22 | return (
23 |
24 | {transcribedText}
25 | {interimTranscribedText}
26 |
27 | )
28 | }
29 |
30 | export default withStyles(useStyles)(TranscribeOutput);
31 |
--------------------------------------------------------------------------------
/src/index.css:
--------------------------------------------------------------------------------
1 | body {
2 | margin: 0;
3 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
4 | 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
5 | sans-serif;
6 | -webkit-font-smoothing: antialiased;
7 | -moz-osx-font-smoothing: grayscale;
8 | }
9 |
10 | code {
11 | font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
12 | monospace;
13 | }
14 |
--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import ReactDOM from 'react-dom';
3 | import './index.css';
4 | import App from './App';
5 | import * as serviceWorker from './serviceWorker';
6 |
7 | ReactDOM.render(
8 |
9 |
10 | ,
11 | document.getElementById('root')
12 | );
13 |
14 | // If you want your app to work offline and load faster, you can change
15 | // unregister() to register() below. Note this comes with some pitfalls.
16 | // Learn more about service workers: https://bit.ly/CRA-PWA
17 | serviceWorker.unregister();
18 |
--------------------------------------------------------------------------------
/src/serviceWorker.js:
--------------------------------------------------------------------------------
1 | // This optional code is used to register a service worker.
2 | // register() is not called by default.
3 |
4 | // This lets the app load faster on subsequent visits in production, and gives
5 | // it offline capabilities. However, it also means that developers (and users)
6 | // will only see deployed updates on subsequent visits to a page, after all the
7 | // existing tabs open on the page have been closed, since previously cached
8 | // resources are updated in the background.
9 |
10 | // To learn more about the benefits of this model and instructions on how to
11 | // opt-in, read https://bit.ly/CRA-PWA
12 |
13 | const isLocalhost = Boolean(
14 | window.location.hostname === 'localhost' ||
15 | // [::1] is the IPv6 localhost address.
16 | window.location.hostname === '[::1]' ||
17 | // 127.0.0.0/8 are considered localhost for IPv4.
18 | window.location.hostname.match(
19 | /^127(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}$/
20 | )
21 | );
22 |
23 | export function register(config) {
24 | if (process.env.NODE_ENV === 'production' && 'serviceWorker' in navigator) {
25 | // The URL constructor is available in all browsers that support SW.
26 | const publicUrl = new URL(process.env.PUBLIC_URL, window.location.href);
27 | if (publicUrl.origin !== window.location.origin) {
28 | // Our service worker won't work if PUBLIC_URL is on a different origin
29 | // from what our page is served on. This might happen if a CDN is used to
30 | // serve assets; see https://github.com/facebook/create-react-app/issues/2374
31 | return;
32 | }
33 |
34 | window.addEventListener('load', () => {
35 | const swUrl = `${process.env.PUBLIC_URL}/service-worker.js`;
36 |
37 | if (isLocalhost) {
38 | // This is running on localhost. Let's check if a service worker still exists or not.
39 | checkValidServiceWorker(swUrl, config);
40 |
41 | // Add some additional logging to localhost, pointing developers to the
42 | // service worker/PWA documentation.
43 | navigator.serviceWorker.ready.then(() => {
44 | console.log(
45 | 'This web app is being served cache-first by a service ' +
46 | 'worker. To learn more, visit https://bit.ly/CRA-PWA'
47 | );
48 | });
49 | } else {
50 | // Is not localhost. Just register service worker
51 | registerValidSW(swUrl, config);
52 | }
53 | });
54 | }
55 | }
56 |
57 | function registerValidSW(swUrl, config) {
58 | navigator.serviceWorker
59 | .register(swUrl)
60 | .then(registration => {
61 | registration.onupdatefound = () => {
62 | const installingWorker = registration.installing;
63 | if (installingWorker == null) {
64 | return;
65 | }
66 | installingWorker.onstatechange = () => {
67 | if (installingWorker.state === 'installed') {
68 | if (navigator.serviceWorker.controller) {
69 | // At this point, the updated precached content has been fetched,
70 | // but the previous service worker will still serve the older
71 | // content until all client tabs are closed.
72 | console.log(
73 | 'New content is available and will be used when all ' +
74 | 'tabs for this page are closed. See https://bit.ly/CRA-PWA.'
75 | );
76 |
77 | // Execute callback
78 | if (config && config.onUpdate) {
79 | config.onUpdate(registration);
80 | }
81 | } else {
82 | // At this point, everything has been precached.
83 | // It's the perfect time to display a
84 | // "Content is cached for offline use." message.
85 | console.log('Content is cached for offline use.');
86 |
87 | // Execute callback
88 | if (config && config.onSuccess) {
89 | config.onSuccess(registration);
90 | }
91 | }
92 | }
93 | };
94 | };
95 | })
96 | .catch(error => {
97 | console.error('Error during service worker registration:', error);
98 | });
99 | }
100 |
101 | function checkValidServiceWorker(swUrl, config) {
102 | // Check if the service worker can be found. If it can't reload the page.
103 | fetch(swUrl, {
104 | headers: { 'Service-Worker': 'script' },
105 | })
106 | .then(response => {
107 | // Ensure service worker exists, and that we really are getting a JS file.
108 | const contentType = response.headers.get('content-type');
109 | if (
110 | response.status === 404 ||
111 | (contentType != null && contentType.indexOf('javascript') === -1)
112 | ) {
113 | // No service worker found. Probably a different app. Reload the page.
114 | navigator.serviceWorker.ready.then(registration => {
115 | registration.unregister().then(() => {
116 | window.location.reload();
117 | });
118 | });
119 | } else {
120 | // Service worker found. Proceed as normal.
121 | registerValidSW(swUrl, config);
122 | }
123 | })
124 | .catch(() => {
125 | console.log(
126 | 'No internet connection found. App is running in offline mode.'
127 | );
128 | });
129 | }
130 |
131 | export function unregister() {
132 | if ('serviceWorker' in navigator) {
133 | navigator.serviceWorker.ready
134 | .then(registration => {
135 | registration.unregister();
136 | })
137 | .catch(error => {
138 | console.error(error.message);
139 | });
140 | }
141 | }
142 |
--------------------------------------------------------------------------------
/src/setupTests.js:
--------------------------------------------------------------------------------
1 | // jest-dom adds custom jest matchers for asserting on DOM nodes.
2 | // allows you to do things like:
3 | // expect(element).toHaveTextContent(/react/i)
4 | // learn more: https://github.com/testing-library/jest-dom
5 | import '@testing-library/jest-dom/extend-expect';
6 |
--------------------------------------------------------------------------------
/src/utility_transcribe.js:
--------------------------------------------------------------------------------
1 | import io from 'socket.io-client';
2 |
3 | const socket = new io.connect("http://0.0.0.0:10000/", {transports: ['websocket']});
4 |
5 | // Stream Audio
6 | let bufferSize = 2048,
7 | AudioContext,
8 | context,
9 | processor,
10 | input,
11 | globalStream;
12 |
13 | const mediaConstraints = {
14 | audio: true,
15 | video: false
16 | };
17 |
18 | let AudioStreamer = {
19 | /**
20 | * @param {object} transcribeConfig Transcription configuration such as language, encoding, etc.
21 | * @param {function} onData Callback to run on data each time it's received
22 | * @param {function} onError Callback to run on an error if one is emitted.
23 | */
24 | initRecording: function (transcribeConfig, onData, onError) {
25 | socket.emit('startGoogleCloudStream', {...transcribeConfig});
26 | AudioContext = window.AudioContext || window.webkitAudioContext;
27 | context = new AudioContext();
28 | processor = context.createScriptProcessor(bufferSize, 1, 1);
29 | processor.connect(context.destination);
30 | context.resume();
31 |
32 | const handleSuccess = function (stream) {
33 | globalStream = stream;
34 | input = context.createMediaStreamSource(stream);
35 | input.connect(processor);
36 |
37 | processor.onaudioprocess = function (e) {
38 | microphoneProcess(e);
39 | };
40 | };
41 |
42 | navigator.mediaDevices.getUserMedia(mediaConstraints)
43 | .then(handleSuccess);
44 |
45 | if (onData) {
46 | socket.on('speechData', (response) => {
47 | onData(response.data, response.isFinal);
48 | });
49 | }
50 |
51 | socket.on('googleCloudStreamError', (error) => {
52 | if (onError) {
53 | onError('error');
54 | }
55 | closeAll();
56 | });
57 |
58 | socket.on('endGoogleCloudStream', () => {
59 | closeAll();
60 | });
61 | },
62 |
63 | stopRecording: function () {
64 | socket.emit('endGoogleCloudStream');
65 | closeAll();
66 | }
67 | }
68 |
69 | export default AudioStreamer;
70 |
71 | // Helper functions
72 | /**
73 | * Processes microphone data into a data stream
74 | *
75 | * @param {object} e Input from the microphone
76 | */
77 | function microphoneProcess(e) {
78 | const left = e.inputBuffer.getChannelData(0);
79 | const left16 = convertFloat32ToInt16(left);
80 | socket.emit('binaryAudioData', left16);
81 | }
82 |
83 | /**
84 | * Converts a buffer from float32 to int16. Necessary for streaming.
85 | * sampleRateHertz of 1600.
86 | *
87 | * @param {object} buffer Buffer being converted
88 | */
89 | function convertFloat32ToInt16(buffer) {
90 | let l = buffer.length;
91 | let buf = new Int16Array(l / 3);
92 |
93 | while (l--) {
94 | if (l % 3 === 0) {
95 | buf[l / 3] = buffer[l] * 0xFFFF;
96 | }
97 | }
98 | return buf.buffer
99 | }
100 |
101 | /**
102 | * Stops recording and closes everything down. Runs on error or on stop.
103 | */
104 | function closeAll() {
105 | // Clear the listeners (prevents issue if opening and closing repeatedly)
106 | socket.off('speechData');
107 | socket.off('googleCloudStreamError');
108 | let tracks = globalStream ? globalStream.getTracks() : null;
109 | let track = tracks ? tracks[0] : null;
110 | if (track) {
111 | track.stop();
112 | }
113 |
114 | if (processor) {
115 | if (input) {
116 | try {
117 | input.disconnect(processor);
118 | } catch (error) {
119 | console.warn('Attempt to disconnect input failed.')
120 | }
121 | }
122 | processor.disconnect(context.destination);
123 | }
124 | if (context) {
125 | context.close().then(function () {
126 | input = null;
127 | processor = null;
128 | context = null;
129 | AudioContext = null;
130 | });
131 | }
132 | }
--------------------------------------------------------------------------------