├── db.sqlite3
├── SpeechServer
    ├── __init__.py
    ├── wsgi.py
    ├── urls.py
    └── settings.py
├── speech_server_main
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── config.json
    │   └── config.py
    ├── deepspeech
    │   ├── __init__.py
    │   └── deepspeech.py
    ├── migrations
    │   └── __init__.py
    ├── models.py
    ├── admin.py
    ├── tests.py
    ├── urls.py
    ├── static
    │   ├── css
    │   │   ├── theme.css
    │   │   └── main.css
    │   └── speech_server_main
    │   │   ├── wav encoder license
    │   │       └── LICENSE.txt
    │   │   ├── audioRecorderWorker.js
    │   │   ├── audioRecorder.js
    │   │   ├── WavAudioEncoder.js
    │   │   ├── script.js
    │   │   └── resampler.js
    ├── routing.py
    ├── logging.py
    ├── views.py
    ├── consumers.py
    ├── apps.py
    └── templates
    │   └── speech_server_main
    │       ├── index.html
    │       └── base.html
├── requirements.txt
├── .project
├── manage.py
├── .pydevproject
├── LICENSE
├── .gitignore
└── README.md


/db.sqlite3:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/SpeechServer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/speech_server_main/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/speech_server_main/config/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/speech_server_main/deepspeech/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/speech_server_main/migrations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/speech_server_main/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 | 
3 | 
4 | # Create your models here.
5 | 


--------------------------------------------------------------------------------
/speech_server_main/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | 
3 | 
4 | # Register your models here.
5 | 


--------------------------------------------------------------------------------
/speech_server_main/tests.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 | 
3 | 
4 | # Create your tests here.
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | scipy>=1.1.0
 2 | appdirs>=1.4.3
 3 | Django==2.1.2
 4 | django-cors-headers==2.1.0
 5 | django-sslserver==0.20
 6 | django-bootstrap3>=9.1.0
 7 | django-filter>=1.0.2
 8 | djangorestframework>=3.6.2
 9 | channels==1.1.8
10 | deepspeech==0.4.0
11 | 


--------------------------------------------------------------------------------
/speech_server_main/urls.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 01-Jan-2018
 3 | 
 4 | @author: ashwani
 5 | '''
 6 | from django.urls.conf import path
 7 | from . import views
 8 | 
 9 | app_name = 'swp'
10 | urlpatterns = [
11 |     path('', views.index, name='index'),
12 |     path('handleaudio/', views.handle_audio, name='handleaudio')
13 | ]


--------------------------------------------------------------------------------
/speech_server_main/static/css/theme.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   padding-top: 70px;
 3 |   padding-bottom: 30px;
 4 | }
 5 | 
 6 | .theme-dropdown .dropdown-menu {
 7 |   position: static;
 8 |   display: block;
 9 |   margin-bottom: 20px;
10 | }
11 | 
12 | .theme-showcase > p > .btn {
13 |   margin: 5px 0;
14 | }
15 | 
16 | .theme-showcase .navbar .container {
17 |   width: auto;
18 | }
19 | 


--------------------------------------------------------------------------------
/speech_server_main/routing.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Channels routing configuration.
 3 | '''
 4 | 
 5 | from channels.routing import route
 6 | from speech_server_main.consumers import ws_connect, ws_message, ws_disconnect
 7 | channel_routing = [
 8 |     route("websocket.connect", ws_connect),
 9 |     route("websocket.receive", ws_message),
10 |     route("websocket.disconnect", ws_disconnect),
11 | ]
12 | 


--------------------------------------------------------------------------------
/speech_server_main/config/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "deepspeech": {
 3 |     "model" :"/media/ashwanip/New Volume/DeepSpeech/models/output_graph.pb",
 4 |     "lm": "/media/ashwanip/New Volume/DeepSpeech/models/lm.binary",
 5 |     "trie": "/media/ashwanip/New Volume/DeepSpeech/models/trie",
 6 |     "audiofiledir": "/media/ashwanip/New Volume/DeepSpeech/audio/",
 7 |     "audiofilelength": "10",
 8 |     "debug": "1"
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/speech_server_main/static/css/main.css:
--------------------------------------------------------------------------------
 1 | .table-nonfluid {
 2 |    width: auto !important;
 3 | }
 4 | 
 5 | .table-nonfluid>thead>tr>th {
 6 |    width: 220px;
 7 | }
 8 | 
 9 | #error-panel, #progress-panel  {
10 |    display: none;
11 | }
12 | 
13 | .progress.active .progress-bar {
14 |     -webkit-transition: none !important;
15 |     transition: none !important;
16 | }
17 | 
18 | .result_container, .form-main {
19 |     margin-top: 10px;
20 | }
21 | 


--------------------------------------------------------------------------------
/SpeechServer/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI config for SpeechServer project.
 3 | 
 4 | It exposes the WSGI callable as a module-level variable named ``application``.
 5 | 
 6 | For more information on this file, see
 7 | https://docs.djangoproject.com/en/2.0/howto/deployment/wsgi/
 8 | """
 9 | 
10 | import os
11 | 
12 | from django.core.wsgi import get_wsgi_application
13 | 
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "SpeechServer.settings")
15 | 
16 | application = get_wsgi_application()
17 | 


--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>SpeechServer</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.python.pydev.PyDevBuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 	</buildSpec>
14 | 	<natures>
15 | 		<nature>org.python.pydev.pythonNature</nature>
16 | 		<nature>org.python.pydev.django.djangoNature</nature>
17 | 	</natures>
18 | </projectDescription>
19 | 


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | 
 5 | if __name__ == "__main__":
 6 |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "SpeechServer.settings")
 7 |     try:
 8 |         from django.core.management import execute_from_command_line
 9 |     except ImportError as exc:
10 |         raise ImportError(
11 |             "Couldn't import Django. Are you sure it's installed and "
12 |             "available on your PYTHONPATH environment variable? Did you "
13 |             "forget to activate a virtual environment?"
14 |         ) from exc
15 |     execute_from_command_line(sys.argv)
16 | 


--------------------------------------------------------------------------------
/.pydevproject:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <?eclipse-pydev version="1.0"?><pydev_project>
 3 | <pydev_variables_property name="org.python.pydev.PROJECT_VARIABLE_SUBSTITUTION">
 4 | <key>DJANGO_MANAGE_LOCATION</key>
 5 | <value>manage.py</value>
 6 | </pydev_variables_property>
 7 | <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
 8 | <path>/${PROJECT_DIR_NAME}</path>
 9 | </pydev_pathproperty>
10 | <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python interpreter</pydev_property>
11 | <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
12 | </pydev_project>
13 | 


--------------------------------------------------------------------------------
/speech_server_main/logging.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from os import path
 3 | import logging
 4 | from speech_server_main.config import config
 5 | 
 6 | if not path.exists("./logs"):
 7 |     os.mkdir("./logs")
 8 |     
 9 | logging.basicConfig(filename="./logs/deepspeech_server.log", format='%(levelname)s %(asctime)s %(message)s')
10 | def log(message, log_level="warning"):
11 |     
12 |     set_debug = config.ConfigDeepSpeech().get_config("debug")
13 |     if set_debug == "1":
14 |         logging.getLogger(None).setLevel(logging.DEBUG)
15 |     if log_level == "debug":
16 |         logging.debug(message)
17 |     elif log_level == "info":
18 |         logging.info(message)
19 |     elif log_level == "error":
20 |         logging.error(message)
21 |     else:
22 |         logging.warning(message)


--------------------------------------------------------------------------------
/SpeechServer/urls.py:
--------------------------------------------------------------------------------
 1 | """SpeechServer URL Configuration
 2 | 
 3 | The `urlpatterns` list routes URLs to views. For more information please see:
 4 |     https://docs.djangoproject.com/en/2.0/topics/http/urls/
 5 | Examples:
 6 | Function views
 7 |     1. Add an import:  from my_app import views
 8 |     2. Add a URL to urlpatterns:  path('', views.home, name='home')
 9 | Class-based views
10 |     1. Add an import:  from other_app.views import Home
11 |     2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
12 | Including another URLconf
13 |     1. Import the include() function: from django.urls import include, path
14 |     2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
15 | """
16 | from django.conf.urls import include, url
17 | from django.contrib import admin
18 | from django.views.generic import RedirectView
19 | 
20 | urlpatterns = [
21 |     url(r'^$', RedirectView.as_view(url='/dsserver')),
22 |     url(r'^admin/', admin.site.urls),
23 |     url(r'^dsserver/', include('speech_server_main.urls')),
24 | ]
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Ashwani Pandey
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/speech_server_main/views.py:
--------------------------------------------------------------------------------
 1 | from django.shortcuts import render
 2 | from django.views.decorators.csrf import ensure_csrf_cookie, csrf_exempt
 3 | from django.http.response import HttpResponse
 4 | from datetime import datetime
 5 | from .deepspeech import deepspeech as ds
 6 | from speech_server_main.config import config
 7 | from speech_server_main import logging
 8 | 
 9 | conf = config.ConfigDeepSpeech()
10 | 
11 | @ensure_csrf_cookie
12 | def index(request):
13 |     return render(request, 'speech_server_main/index.html')
14 | 
15 | @csrf_exempt
16 | def handle_audio(request):
17 |     try:
18 |         data=request.body
19 |         audiofiledir = conf.get_config('audiofiledir')
20 |         file_name = audiofiledir + 'http_generated_' + datetime.now().strftime('%y-%m-%d_%H%M%S')
21 |         logging.log("file name: {0}".format(file_name), "debug")
22 |         with open(file_name, 'wb') as f:
23 |             f.write(data)
24 |         
25 |         msg = ds.stt(file_name)
26 |     except Exception as err:
27 |         logging.log("exception occurred in handle_audio: {0}".format(err), "error")
28 |         msg = "failed"
29 |     return HttpResponse(msg)
30 | 


--------------------------------------------------------------------------------
/speech_server_main/consumers.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Channel consumers
 3 | '''
 4 | 
 5 | from datetime import datetime
 6 | from speech_server_main.config import config
 7 | from .deepspeech import deepspeech as ds
 8 | import os, re
 9 | 
10 | conf = config.ConfigDeepSpeech()
11 | 
12 | audiofiledir = conf.get_config('audiofiledir')
13 | def ws_connect(message):
14 |     # Accept the incoming connection
15 |     message.reply_channel.send({"accept": True})
16 |     print('websocket connected')
17 | 
18 | def ws_message(message):
19 |     print('ws message received')
20 |     file_name = audiofiledir + 'ws_generated' + datetime.now().strftime('%y-%m-%d_%H%M%S')
21 |     with open(file_name, 'wb') as fp:
22 |         fp.write(message.content['bytes'])
23 |         fp.flush()
24 |     msg = ds.stt(file_name, True)
25 |     message.reply_channel.send({'text':msg})
26 | 
27 | def ws_disconnect(message):
28 |     print('ws disconnected')
29 |     delete_files_in_dir('^ws_generated', audiofiledir)
30 | 
31 | def delete_files_in_dir(pattern, dir):
32 |     print('inside delete module')
33 |     for file in os.listdir(dir):
34 |         if re.search(pattern, file):
35 |             os.remove(os.path.join(dir, file))
36 | 


--------------------------------------------------------------------------------
/speech_server_main/static/speech_server_main/wav encoder license/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Yuji Miyane
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/speech_server_main/config/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from functools import lru_cache
 4 | 
 5 | class ConfigDeepSpeech:
 6 |     
 7 |     @lru_cache(maxsize=32)
 8 |     def get_config(self, key):
 9 |         print('inside module')
10 |         module_dir = os.path.dirname(__file__)  # get current directory
11 |         file_path = os.path.join(module_dir, 'config.json')
12 |         
13 |         with open(file_path, 'r') as f:
14 |             config = json.load(f)
15 |             
16 |         ds_config = config['deepspeech']
17 |         model = ds_config['model']
18 |         lm = ds_config['lm']
19 |         trie = ds_config['trie']
20 |         audiofiledir = ds_config['audiofiledir']
21 |         audiofilelength = ds_config['audiofilelength']
22 |         debug = ds_config['debug']
23 |         if key == 'model':
24 |             return model
25 |         elif key == 'lm':
26 |             return lm
27 |         elif key == 'trie':
28 |             return trie
29 |         elif key == 'audiofiledir':
30 |             return audiofiledir
31 |         elif key == 'audiofilelength':
32 |             return audiofilelength
33 |         elif key == 'debug':
34 |             return debug
35 |         else:
36 |             raise Exception('Invalid key value.')
37 |         


--------------------------------------------------------------------------------
/speech_server_main/apps.py:
--------------------------------------------------------------------------------
 1 | from django.apps import AppConfig
 2 | from deepspeech import Model
 3 | from speech_server_main.config import config
 4 | 
 5 | # These constants control the beam search decoder
 6 | 
 7 | # Beam width used in the CTC decoder when building candidate transcriptions
 8 | BEAM_WIDTH = 500
 9 | 
10 | # The alpha hyperparameter of the CTC decoder. Language Model weight
11 | LM_ALPHA = 0.75
12 | 
13 | # The beta hyperparameter of the CTC decoder. Word insertion bonus.
14 | LM_BETA = 1.85
15 | 
16 | # These constants are tied to the shape of the graph used (changing them changes
17 | # the geometry of the first layer), so make sure you use the same constants that
18 | # were used during training
19 | 
20 | # Number of MFCC features to use
21 | # N_FEATURES = 26
22 | 
23 | # Size of the context window used for producing timesteps in the input vector
24 | # N_CONTEXT = 9
25 | 
26 | 
27 | class SpeechServerMain(AppConfig):
28 |     name = 'speech_server_main'
29 |     conf = config.ConfigDeepSpeech()
30 |     model = conf.get_config('model')
31 |     lm = conf.get_config('lm')
32 |     trie = conf.get_config('trie')
33 | 
34 |     ds = Model(model, BEAM_WIDTH)
35 |     if lm and trie:
36 |         ds.enableDecoderWithLM(lm, trie, LM_ALPHA, LM_BETA)
37 | 
38 |     def ready(self):
39 |         print("Deepspeech Server Initialization")
40 | 


--------------------------------------------------------------------------------
/speech_server_main/static/speech_server_main/audioRecorderWorker.js:
--------------------------------------------------------------------------------
 1 | importScripts('/static/speech_server_main/resampler.js');
 2 | importScripts('/static/speech_server_main/WavAudioEncoder.js');
 3 | 
 4 | var recLength = 0;
 5 | var recBuffersL = [];
 6 | var bits = 16;
 7 | var sampleRate;
 8 | var encoder;
 9 | var resampler;
10 | 
11 | this.onmessage = function(e){
12 |   switch(e.data.command){
13 |     case 'init':
14 |       init(e.data.config);
15 |       break;
16 |     case 'record':
17 |       record(e.data.buffer);
18 |       break;
19 |     case 'exportWAV':
20 |       exportWAV(e.data.type, e.data.doCleanup);
21 |       break;
22 |     case 'clear':
23 |       clear();
24 |       break;
25 |   }
26 | };
27 | 
28 | function init(config){
29 | 	var contextSampleRate = config.contextSampleRate;
30 | 	sampleRate = config.desiredSampleRate;
31 | 	encoder = new WavAudioEncoder(sampleRate, 1);
32 | 	resampler = new Resampler(contextSampleRate, sampleRate, 1, 4096);
33 | }
34 | 
35 | function record(inputBuffer) {
36 | 	if(typeof resampler !== 'undefined'){		
37 | 		inputBuffer[0] = resampler.resampler(inputBuffer[0]);
38 | 	}
39 | 	encoder.encode(inputBuffer);
40 | }
41 | 
42 | function exportWAV(type, doCleanup) {
43 | 	var audioBlob = encoder.finish(type, doCleanup);
44 | 	this.postMessage(audioBlob);
45 | }
46 | 
47 | function clear() {
48 | 	encoder.cancel();
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/speech_server_main/deepspeech/deepspeech.py:
--------------------------------------------------------------------------------
 1 | import scipy.io.wavfile as wav
 2 | from speech_server_main.apps import SpeechServerMain
 3 | from speech_server_main.config import config
 4 | from speech_server_main import logging
 5 | 
 6 | audiolength = float(config.ConfigDeepSpeech().get_config("audiofilelength"))
 7 | 
 8 | def stt(audioPath, from_websocket=False):
 9 |     try:
10 |         logging.log("Inside deepspeech.stt function", "info")
11 |         text = ""
12 |         fs, audio = wav.read(audioPath)
13 |         if fs == 16000:
14 |             if from_websocket or check_audio_lenth(len(audio)):
15 |                 logging.log("Starting transcribing...", "info")
16 |                 text = SpeechServerMain.ds.stt(audio)
17 |                 logging.log("Audio transcribed.", "info")
18 |             elif not from_websocket:
19 |                 text = "Audio should be less than " + str(audiolength) + " seconds."
20 |         else:
21 |             text = "Frame rate of submitted audio should be 16000 kHz."
22 |         #print('after inference: %s' % text)
23 |     except Exception as err:
24 |         logging.log("exception occurred: {0}".format(err), "error")
25 |         text = "Some error occurred while transcribing."
26 | 
27 |     return text
28 | 
29 | def check_audio_lenth(len_audio):
30 |     len_audio = len_audio / 16000
31 |     if len_audio > audiolength:
32 |         return False;
33 |     else:
34 |         return True;
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | .idea
91 | 
92 | data/
93 | 


--------------------------------------------------------------------------------
/speech_server_main/static/speech_server_main/audioRecorder.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Record audio
 3 |  */
 4 | (function(window){
 5 | 	
 6 | 	function AudioRecorderObject(source) {
 7 | 		var callback;
 8 | 		var recording = false;
 9 | 		
10 | 		this.context = source.context;
11 | 		this.node = (this.context.createScriptProcessor ||
12 | 				this.context.createJavaScriptNode).call(this.context, 4096, 2, 2);
13 | 		var worker = new Worker('/static/speech_server_main/audioRecorderWorker.js');
14 | 		
15 | 		worker.onmessage = function(e){
16 | 			var blob = e.data;
17 | 			callback(blob);
18 | 		};
19 | 		
20 | 		worker.postMessage({
21 | 			command: 'init',
22 | 			config: {
23 | 				contextSampleRate: this.context.sampleRate,
24 | 				desiredSampleRate: 16000,
25 | 			}
26 | 		});
27 | 		
28 | 		this.record = function(){
29 | 			recording = true;
30 | 		};
31 | 
32 | 		this.stop = function(){
33 | 			recording = false;
34 | 		};
35 | 
36 | 		this.clear = function(){
37 | 			worker.postMessage({ command: 'clear' });
38 | 		};
39 | 		
40 | 		this.exportWAV = function(cb, doCleanup){
41 | 			callback = cb;
42 | 			if (!callback) throw new Error('Unable to set callback function. Please check if provided.');
43 | 
44 | 			worker.postMessage({
45 | 				command: 'exportWAV',
46 | 				type: 'audio/wav',
47 | 				doCleanup: doCleanup,
48 | 			});
49 | 		};
50 | 		
51 | 		this.node.onaudioprocess = function(e){
52 | 			if (!recording) return;
53 | 
54 | 
55 | 			worker.postMessage({
56 | 				command: 'record',
57 | 				buffer: [
58 | 					e.inputBuffer.getChannelData(0),
59 | 				]
60 | 			});
61 | 		};
62 | 
63 | 		source.connect(this.node);
64 | 		this.node.connect(this.context.destination); //need to check if this is required.
65 | 		
66 | 	}
67 | 	
68 | 	var audioRecorder =  {
69 | 
70 | 			  fromSource: function(src){
71 | 				 return new AudioRecorderObject(src);
72 | 		}
73 | 	};
74 | 	
75 | 	window.audioRecorder = audioRecorder;
76 | 	
77 | })(window);


--------------------------------------------------------------------------------
/speech_server_main/templates/speech_server_main/index.html:
--------------------------------------------------------------------------------
 1 | {% extends "speech_server_main/base.html" %}
 2 | {% block content %}
 3 | 
 4 | {# Load the tag library #}
 5 | {% load bootstrap3 %}
 6 | <h2>Deepspeech Server</h2>
 7 | This is Mozilla deepspeech server implemented in django.
 8 | One can record sound in browser or choose an audio file and submit it to get corresponding text.
 9 | 
10 | <div class="form-main">
11 | 	<div>
12 | 		<label>Use: </label>
13 | 		<label class="radio-inline" for="http-radio">
14 | 			<input class="form-check-input" type="radio" id="http-radio" value="HTTP/HTTPS" name="protocol" checked="checked" onchange="protocolHandler();"/> Use HTTP/HTTPS protocol&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
15 | 		</label>
16 | 		<label class="radio-inline" for="ws-radio">
17 | 			<input class="form-check-input" type="radio" id="ws-radio" value="ws" name="protocol" onchange="protocolHandler();"/> Use websockets
18 | 		</label>
19 | 	</div>
20 |     <div class="form-group">
21 |         <label for="player">Record your voice</label>
22 |         <div id="player">
23 |             <audio controls src=""></audio>
24 |         </div>
25 |         <button onclick="startRecording()" class="btn btn-danger">Record</button>
26 |         <button onclick="stopRecording()" class="btn btn-info">Stop</button>
27 |     </div>
28 |     <div class="form-group">
29 |         <label for="file">Or choose audio file</label>
30 |         <input type="file" class="form-control-file" id="file"/>
31 |     </div>
32 |     <button onclick="submitToServer()" id="submitAudio"
33 |             class="btn btn-primary">Submit Audio</button>
34 | </div>
35 | <div class="result_container form-group">
36 |     <label for="result">Result</label>
37 |     <textarea class="form-control" id="result" rows="3"></textarea>
38 | </div>
39 | 
40 | <div id="error-panel" class="alert" role="alert">
41 |     <a id="error-message"></a>
42 | </div>
43 | 
44 | <div id="progress-panel">
45 |     <div class="progress-container">
46 |         <div class="progress progress-striped active">
47 |             <div class="progress-bar progress-bar-success" style="width:0%"></div>
48 |         </div>
49 |     </div>
50 | </div>
51 | 
52 | {% endblock %}
53 | 


--------------------------------------------------------------------------------
/speech_server_main/static/speech_server_main/WavAudioEncoder.js:
--------------------------------------------------------------------------------
 1 | (function(self) {
 2 |   var min = Math.min,
 3 |       max = Math.max;
 4 | 
 5 |   var setString = function(view, offset, str) {
 6 |     var len = str.length;
 7 |     for (var i = 0; i < len; ++i)
 8 |       view.setUint8(offset + i, str.charCodeAt(i));
 9 |   };
10 | 
11 |   var Encoder = function(sampleRate, numChannels) {
12 |     this.sampleRate = sampleRate;
13 |     this.numChannels = numChannels;
14 |     this.numSamples = 0;
15 |     this.dataViews = [];
16 |   };
17 | 
18 |   Encoder.prototype.encode = function(buffer) {
19 |     var len = buffer[0].length,
20 |         nCh = this.numChannels,
21 |         view = new DataView(new ArrayBuffer(len * nCh * 2)),
22 |         offset = 0;
23 |     for (var i = 0; i < len; ++i)
24 |       for (var ch = 0; ch < nCh; ++ch) {
25 |         var x = buffer[ch][i] * 0x7fff;
26 |         view.setInt16(offset, x < 0 ? max(x, -0x8000) : min(x, 0x7fff), true);
27 |         offset += 2;
28 |       }
29 |     this.dataViews.push(view);
30 |     this.numSamples += len;
31 |   };
32 | 
33 |   Encoder.prototype.finish = function(mimeType, doCleanup) {
34 |     var dataSize = this.numChannels * this.numSamples * 2,
35 |         view = new DataView(new ArrayBuffer(44));
36 |     setString(view, 0, 'RIFF');
37 |     view.setUint32(4, 36 + dataSize, true);
38 |     setString(view, 8, 'WAVE');
39 |     setString(view, 12, 'fmt ');
40 |     view.setUint32(16, 16, true);
41 |     view.setUint16(20, 1, true);
42 |     view.setUint16(22, this.numChannels, true);
43 |     view.setUint32(24, this.sampleRate, true);
44 |     view.setUint32(28, this.sampleRate * 4, true);
45 |     view.setUint16(32, this.numChannels * 2, true);
46 |     view.setUint16(34, 16, true);
47 |     setString(view, 36, 'data');
48 |     view.setUint32(40, dataSize, true);
49 |     this.dataViews.unshift(view);
50 |     var blob = new Blob(this.dataViews, { type: 'audio/wav' });
51 |     if(doCleanup){
52 |     	this.cleanup();
53 |     }
54 |     return blob;
55 |   };
56 | 
57 |   Encoder.prototype.cancel = Encoder.prototype.cleanup = function() {
58 |     delete this.dataViews;
59 |   };
60 | 
61 |   self.WavAudioEncoder = Encoder;
62 | })(self);
63 | 


--------------------------------------------------------------------------------
/speech_server_main/templates/speech_server_main/base.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 7 |     <title>Deepspeech</title>
 8 |     <script
 9 |             src="https://code.jquery.com/jquery-3.2.1.min.js"
10 |             integrity="sha256-hwg4gsxgFZhOsEEamdOYGBf13FyQuiTwlAQgxVSNgt4="
11 |             crossorigin="anonymous">
12 |     </script>
13 |     <script src="/static/speech_server_main/script.js" type="text/javascript"></script>
14 |     <script src="/static/speech_server_main/audioRecorder.js" type="text/javascript"></script>
15 |     <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css"
16 |           rel="stylesheet" integrity="sha384-wvfXpqpZZVQGK6TAh5PVlGOfQNHSoD2xbE+QkPxCAFlNEevoEH3Sl0sibVcOQVnN"
17 |           crossorigin="anonymous">
18 |     <link rel="stylesheet" href="/static/css/theme.css">
19 |     <link rel="stylesheet" href="/static/css/main.css">
20 |     {% load bootstrap3 %}
21 |     {% bootstrap_css %}
22 |     {% bootstrap_javascript %}
23 | 
24 |     {# Display django.contrib.messages as Bootstrap alerts #}
25 |     {% bootstrap_messages %}
26 | </head>
27 | <body>
28 | 
29 | <nav class="navbar navbar-inverse navbar-fixed-top">
30 |     <div class="container">
31 |         <div class="navbar-header">
32 |             <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
33 |                 <span class="sr-only">Toggle navigation</span>
34 |                 <span class="icon-bar"></span>
35 |                 <span class="icon-bar"></span>
36 |                 <span class="icon-bar"></span>
37 |             </button>
38 |             <a class="navbar-brand" href="#">Deepspeech</a>
39 |         </div>
40 |         <div id="navbar" class="collapse navbar-collapse">
41 |             <ul class="nav navbar-nav">
42 |                 <li class="active"><a href="#">Home</a></li>
43 |                 <li><a href="#about">About</a></li>
44 |             </ul>
45 |         </div><!--/.nav-collapse -->
46 |     </div>
47 | </nav>
48 | 
49 | <div class="container">
50 |     {% block content %}
51 |     {% endblock %}
52 | </div>
53 | </body>
54 | </html>
55 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # django-deepspeech-server
 2 | This is [Mozilla deepspeech](https://github.com/mozilla/DeepSpeech) server implemented in django. One can record sound in browser or upload compatible wav file and submit it to get corresponding text. It supports both HTTP/HTTPS and web sockets(ws).<br /> 
 3 | **Note:** For good results using websockets, deepspeech server should have GPU for higher inference rate and SSD is better as it promotes fast disk I/O.
 4 | 
 5 | ## Acknowledgement
 6 | First of all, thanks to mozilla for such a awesome project. Speech to text is revolutionary technology that has huge scope in future and these type of open source efforts will definitely help nurture this tech.
 7 | I have used [wav-encoder](https://github.com/higuma/wav-audio-encoder-js) to encode recorded sound in wav format and [resampler](https://gist.github.com/frequent/34313277a46d5fb050f94a3769804287) to get 16000 Hz sample rate. Got some of my inspiration from [deepspeech-server](https://github.com/MainRo/deepspeech-server).
 8 | 
 9 | ## Installation
10 | Download or clone this project. This project uses python3. To run this project you need to first install deepspeech. Check out [deepspeech's README.md](https://github.com/mozilla/DeepSpeech/blob/master/README.rst) for details on how to install deepspeech on your machine.
11 |     
12 | Once deepspeech is installed, then run following command to install required dependencies of **django-deepspeech-server:**
13 | 
14 |     pip3 install -r path/to/django-deepspeech-server/requirements.txt
15 | 
16 | ## Configuration
17 | Enter path for your model, alphabet, lm and trie in speech-server-main/config/config.json file. Also make change to **audiofiledir** key in same config.json file, to match some valid path on your system. You can also limit audio length by setting [**audiofilelength**](https://github.com/ashwan1/django-deepspeech-server/issues/7) to some time in seconds.
18 | 
19 | Go to directory where manage.py is located and start server:
20 | 
21 |     python3 manage.py runserver
22 |     
23 | Go to your browser and browse to http://127.0.0.1:8000/dsserver.
24 | Alternatively, you can use use https server, using below command:
25 | 
26 |     python3 manage.py runsslserver
27 | 
28 | Now you can access website over https (https://127.0.0.1:8000).
29 | 
30 | ## TODO
31 | - [x] Support for web sockets.
32 | - [x] Input file validation.
33 | - [ ] Real time inference.
34 | - [ ] Provide Google speech API like response, so that one only has to change websocket address.
35 | 
36 | ## License
37 | MIT(see [LICENSE](https://github.com/sci472bmt/django-deepspeech-server/blob/master/LICENSE))
38 | 


--------------------------------------------------------------------------------
/SpeechServer/settings.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Django settings for SpeechServer project.
  3 | 
  4 | Generated by 'django-admin startproject' using Django 2.0.
  5 | 
  6 | For more information on this file, see
  7 | https://docs.djangoproject.com/en/2.0/topics/settings/
  8 | 
  9 | For the full list of settings and their values, see
 10 | https://docs.djangoproject.com/en/2.0/ref/settings/
 11 | """
 12 | 
 13 | import os
 14 | 
 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 17 | 
 18 | 
 19 | # Quick-start development settings - unsuitable for production
 20 | # See https://docs.djangoproject.com/en/2.0/howto/deployment/checklist/
 21 | 
 22 | # SECURITY WARNING: keep the secret key used in production secret!
 23 | SECRET_KEY = '#-kfa^tjc6@bdpc5)d^yveabzd7_j!$ii5_ish66=cc!o3!bih'
 24 | 
 25 | # SECURITY WARNING: don't run with debug turned on in production!
 26 | DEBUG = True
 27 | 
 28 | ALLOWED_HOSTS = ['127.0.0.1', '10.210.22.166']
 29 | 
 30 | 
 31 | # Application definition
 32 | 
 33 | INSTALLED_APPS = [
 34 |     'corsheaders',
 35 |     'sslserver',
 36 |     'bootstrap3',
 37 |     'speech_server_main.apps.SpeechServerMain',
 38 |     'django.contrib.admin',
 39 |     'django.contrib.auth',
 40 |     'django.contrib.contenttypes',
 41 |     'django.contrib.sessions',
 42 |     'django.contrib.messages',
 43 |     'django.contrib.staticfiles',
 44 |     'channels',
 45 | ]
 46 | 
 47 | MIDDLEWARE = [
 48 |     'corsheaders.middleware.CorsMiddleware',
 49 |     'django.middleware.security.SecurityMiddleware',
 50 |     'django.contrib.sessions.middleware.SessionMiddleware',
 51 |     'django.middleware.common.CommonMiddleware',
 52 |     'django.middleware.csrf.CsrfViewMiddleware',
 53 |     'django.contrib.auth.middleware.AuthenticationMiddleware',
 54 |     'django.contrib.messages.middleware.MessageMiddleware',
 55 |     'django.middleware.clickjacking.XFrameOptionsMiddleware',
 56 | ]
 57 | 
 58 | CORS_ORIGIN_WHITELIST = (
 59 |     'localhost:8443',
 60 |     '127.0.0.1:8443',
 61 |     '10.210.22.166:8443'
 62 | )
 63 | 
 64 | CORS_ALLOW_HEADERS = (
 65 |     'accept',
 66 |     'accept-encoding',
 67 |     'authorization',
 68 |     'content-type',
 69 |     'dnt',
 70 |     'origin',
 71 |     'user-agent',
 72 |     'x-csrftoken',
 73 |     'x-requested-with',
 74 |     'x-csrf-token',
 75 |     'cip',
 76 |     'isajaxrequest',
 77 | )
 78 | 
 79 | # CORS_URLS_REGEX = r'^/handleaudio/.*$'
 80 | 
 81 | #settings for channels - Begin
 82 | 
 83 | CHANNEL_LAYERS = {
 84 |     "default": {
 85 |         "BACKEND": "asgiref.inmemory.ChannelLayer",
 86 |         "ROUTING": "speech_server_main.routing.channel_routing",
 87 |     },
 88 | }
 89 | 
 90 | #settings for channels - Begin
 91 | 
 92 | ROOT_URLCONF = 'SpeechServer.urls'
 93 | 
 94 | TEMPLATES = [
 95 |     {
 96 |         'BACKEND': 'django.template.backends.django.DjangoTemplates',
 97 |         'DIRS': [],
 98 |         'APP_DIRS': True,
 99 |         'OPTIONS': {
100 |             'context_processors': [
101 |                 'django.template.context_processors.debug',
102 |                 'django.template.context_processors.request',
103 |                 'django.contrib.auth.context_processors.auth',
104 |                 'django.contrib.messages.context_processors.messages',
105 |             ],
106 |         },
107 |     },
108 | ]
109 | 
110 | WSGI_APPLICATION = 'SpeechServer.wsgi.application'
111 | 
112 | 
113 | # Database
114 | # https://docs.djangoproject.com/en/2.0/ref/settings/#databases
115 | 
116 | DATABASES = {
117 |     'default': {
118 |         'ENGINE': 'django.db.backends.sqlite3',
119 |         'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
120 |     }
121 | }
122 | 
123 | 
124 | # Password validation
125 | # https://docs.djangoproject.com/en/2.0/ref/settings/#auth-password-validators
126 | 
127 | AUTH_PASSWORD_VALIDATORS = [
128 |     {
129 |         'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
130 |     },
131 |     {
132 |         'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
133 |     },
134 |     {
135 |         'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
136 |     },
137 |     {
138 |         'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
139 |     },
140 | ]
141 | 
142 | 
143 | # Internationalization
144 | # https://docs.djangoproject.com/en/2.0/topics/i18n/
145 | 
146 | LANGUAGE_CODE = 'en-us'
147 | 
148 | TIME_ZONE = 'UTC'
149 | 
150 | USE_I18N = True
151 | 
152 | USE_L10N = True
153 | 
154 | USE_TZ = True
155 | 
156 | 
157 | # Static files (CSS, JavaScript, Images)
158 | # https://docs.djangoproject.com/en/2.0/howto/static-files/
159 | 
160 | STATIC_URL = '/static/'
161 | 


--------------------------------------------------------------------------------
/speech_server_main/static/speech_server_main/script.js:
--------------------------------------------------------------------------------
  1 | //(function(){
  2 |     'use strict'
  3 | 
  4 |     var constraints = {
  5 |             audio : true,
  6 |     };
  7 |     var recorder = null;
  8 |     var audioStream = null;
  9 |     var audioData = null;
 10 |     var audioContext = null;
 11 |     var csrftoken = getCookie('csrftoken');
 12 |     var socket = null;
 13 |     var interval;
 14 | 
 15 |     function getCookie(name) {
 16 |       var cookieValue = null;
 17 |       if (document.cookie && document.cookie != '') {
 18 |           var cookies = document.cookie.split(';');
 19 |           for (var i = 0; i < cookies.length; i++) {
 20 |               var cookie = cookies[i].trim();
 21 |               // Does this cookie string begin with the name we want?
 22 |               if (cookie.substring(0, name.length + 1) == (name + '=')) {
 23 |                   cookieValue = decodeURIComponent(cookie.substring(name.length + 1));
 24 |                   break;
 25 |               }
 26 |           }
 27 |       }
 28 |       return cookieValue;
 29 |   }
 30 | 
 31 |     function protocolHandler(){
 32 |     	if($('#ws-radio').prop('checked')){
 33 |     		$('#file').prop('disabled', true);
 34 |     		$('#submitAudio').prop('disabled', true);
 35 |     	} else {
 36 |     		$('#file').prop('disabled', false);
 37 |     		$('#submitAudio').prop('disabled', false);
 38 |     	}
 39 |     }
 40 | 
 41 |     function initWebSocket(){
 42 |     	if(!socket){
 43 |     		socket = new WebSocket('ws://127.0.0.1:8000/dsserver/');
 44 | 
 45 |     		socket.onopen = function(){
 46 |     			interval = setInterval(function(){
 47 |     				recorder.exportWAV(function(blob){
 48 |     		            audioData = blob;
 49 |     		            if(socket && socket.readyState == WebSocket.OPEN){
 50 |     		            	socket.send(audioData);
 51 |     		            }
 52 |     		        }, false);
 53 |     			}, 2000);
 54 |     		}
 55 | 
 56 |     		socket.onmessage = function(res){
 57 |     			$('#result').text(res.data);
 58 |     		}
 59 | 
 60 |     		socket.onerror = function(error){
 61 |     			alert('web socket error: ' + error);
 62 |     		}
 63 | 
 64 |     		socket.onclose = function(e){
 65 |     			clearInterval(interval);
 66 |     			console.log('websocket closed');
 67 |     		}
 68 | 
 69 |     	}
 70 |     }
 71 | 
 72 |     function closeWebSocket(){
 73 |     	if(socket && socket.readyState != WebSocket.CLOSED){
 74 |     		socket.close();
 75 |     	}
 76 | 		socket = null;
 77 |     }
 78 | 
 79 |     function startRecording(){
 80 |     	$("#file").val("");
 81 |     	if (navigator.mediaDevices.getUserMedia === undefined) {
 82 |     		displayError("This browser doesn't support getUserMedia.");
 83 |     	}
 84 |         navigator.mediaDevices.getUserMedia(constraints)
 85 |         .then(function(stream){
 86 |         	audioStream = stream;
 87 |             if(!audioContext){
 88 |                 audioContext = new AudioContext();
 89 |             }
 90 |             var source = audioContext.createMediaStreamSource(stream);
 91 |             recorder = audioRecorder.fromSource(source);
 92 |             recorder.record();
 93 |             if($('#ws-radio').prop('checked') && !socket){
 94 |             	initWebSocket();
 95 |             } else if(socket){
 96 |             	closeWebSocket();
 97 |             }
 98 |         })
 99 |         .catch(function(err){
100 |         	displayError("Error occurred while getting audio stream: " + err);
101 |         })
102 |     }
103 | 
104 |     function stopRecording(){
105 |     	recorder.stop();
106 |     	clearInterval(interval);
107 |         recorder.exportWAV(function(blob){
108 |             audioStream.getTracks()[0].stop();
109 |             audioStream = null;
110 |             audioData = blob;
111 |             var url = URL.createObjectURL(blob);
112 |             var mt = document.createElement('audio');
113 |             mt.controls = true;
114 |             mt.src = url;
115 |             $('#player')[0].innerHTML = "";
116 |             $('#player').append(mt);
117 |             if(socket && socket.readyState == WebSocket.OPEN){
118 |             	socket.send(audioData);
119 |             	closeWebSocket();
120 |             }
121 |         }, true);
122 |         recorder.clear();
123 |     }
124 | 
125 |     function submitToServer(){
126 |         if(audioData == null) {
127 |             displayError("There is no audio data here!");
128 |             return;
129 |         }
130 | 
131 |         $('#error-panel').hide();
132 |         $('#progress-panel').show();
133 |         $('.progress-bar').css('width', '0%').attr('aria-valuenow', 0);
134 |         $('.progress-bar').animate({
135 |             width: "100%"
136 |         }, 1500);
137 |         $.ajax({
138 |           url: "/dsserver/handleaudio/",
139 |           type: "POST",
140 |           contentType: 'application/octet-stream',
141 |           data: audioData,
142 |           processData: false,
143 |           headers: {
144 |             'X-CSRFTOKEN': csrftoken
145 |           },
146 |           success: function(response){
147 |             $('#result').text(response);
148 |             $('#progress-panel').hide();
149 |           },
150 |           error: function(response){
151 |             $('#result').text(response.responseText);
152 |             $('#progress-panel').hide();
153 |           }
154 |         });
155 |     }
156 | 
157 |     var openFile = function(event) {
158 |         var input = event.target;
159 |         var isValid = checkValidity(input.files[0]);
160 |         if(!isValid){
161 |         	displayError("Only wav file type allowed.");
162 |         	return;
163 |         }
164 |         var url = URL.createObjectURL(input.files[0]);
165 |         var mt = document.createElement('audio');
166 |         audioData = input.files[0];
167 |         mt.controls = true;
168 |         mt.src = url;
169 |         $('#player')[0].innerHTML = "";
170 |         $('#player').append(mt);
171 |     };
172 |     
173 |     function checkValidity(file){
174 |     	var isValid = false;
175 |     	var allowedFileTypes = ['audio/x-wav', 'audio/wav'];
176 |     	isValid = allowedFileTypes.includes(file.type);
177 |     	return isValid;
178 |     }
179 |     
180 |     function displayError(errorMsg){
181 |     	$('#error-panel').addClass('alert-danger');
182 |         $('#error-message').text(errorMsg);
183 |         $('#error-panel').show();
184 |     }
185 | 
186 |     $(window).on('load',function(){
187 |     	$("#file").val("");
188 |     	$("#file").change(openFile);
189 |     });
190 | 
191 | //})())
192 | 


--------------------------------------------------------------------------------
/speech_server_main/static/speech_server_main/resampler.js:
--------------------------------------------------------------------------------
  1 | /*jslint nomen: true, indent: 2, maxerr: 3 */
  2 | /*global self, buffer */
  3 | (function (worker_instance) {
  4 |   "use strict";
  5 | 
  6 |   //JavaScript Audio Resampler (c) 2011 - Grant Galitz
  7 | 
  8 |   var INCORRECT_BUFFER_LENGTH = "Buffer was of incorrect sample length.";
  9 |   var INCORRECT_SETTINGS = "Invalid settings specified for the resampler.";
 10 | 
 11 |   function Resampler(fromSampleRate, toSampleRate, channels, outputBufferSize, noReturn) {
 12 | 
 13 |     if (!fromSampleRate || !toSampleRate || !channels) {
 14 |       throw(new Error(INCORRECT_SETTINGS));
 15 |     }
 16 | 
 17 |     this.fromSampleRate = fromSampleRate;
 18 |     this.toSampleRate = toSampleRate;
 19 |     this.channels = channels || 0;
 20 |     this.outputBufferSize = outputBufferSize;
 21 |     this.noReturn = !!noReturn;
 22 | 
 23 |     this.initialize();
 24 |   }
 25 | 
 26 |   Resampler.prototype.bypassResampler = function (buffer) {
 27 |     
 28 |     // set the buffer passed as our own, as we don't need to resample it
 29 |     if (this.noReturn) {
 30 |       this.outputBuffer = buffer;
 31 |       return buffer.length;
 32 |     }
 33 |     // just return the buffer passsed
 34 |     return buffer;
 35 |   };
 36 | 
 37 |   Resampler.prototype.initialize = function () {
 38 |     if (this.fromSampleRate == this.toSampleRate) {
 39 | 
 40 |       // Setup resampler bypass - Resampler just returns what was passed through
 41 |       this.resampler = this.bypassResampler;
 42 |       this.ratioWeight = 1;
 43 | 
 44 |     } else {
 45 |       
 46 |       if (this.fromSampleRate < this.toSampleRate) {
 47 | 
 48 |         // Use generic linear interpolation if upsampling,
 49 |         // as linear interpolation produces a gradient that we want
 50 |         // and works fine with two input sample points per output in this case.
 51 |         this.linearInterpolation();
 52 |         this.lastWeight = 1;
 53 | 
 54 |       } else {
 55 |         
 56 |         // Custom resampler I wrote that doesn't skip samples
 57 |         // like standard linear interpolation in high downsampling.
 58 |         // This is more accurate than linear interpolation on downsampling.
 59 |         this.multiTap();
 60 |         this.tailExists = false;
 61 |         this.lastWeight = 0;
 62 |       }
 63 | 
 64 |       // Initialize the internal buffer:
 65 |       this.initializeBuffers();
 66 |       this.ratioWeight = this.fromSampleRate / this.toSampleRate;
 67 |     }
 68 |   };
 69 | 
 70 |   Resampler.prototype.bufferSlice = function (sliceAmount) {
 71 | 
 72 |     // If we're going to access the properties directly from this object:
 73 |     if (this.noReturn) {
 74 |       return sliceAmount;
 75 |     }
 76 | 
 77 |     //Typed array and normal array buffer section referencing:
 78 |     try {
 79 |       return this.outputBuffer.subarray(0, sliceAmount);
 80 |     }
 81 |     catch (error) {
 82 |       try {
 83 |         //Regular array pass:
 84 |         this.outputBuffer.length = sliceAmount;
 85 |         return this.outputBuffer;
 86 |       }
 87 |       catch (error) {
 88 |         //Nightly Firefox 4 used to have the subarray function named as slice:
 89 |         return this.outputBuffer.slice(0, sliceAmount);
 90 |       }
 91 |     }
 92 |   };
 93 | 
 94 |   Resampler.prototype.initializeBuffers = function () {
 95 |     try {
 96 |       this.outputBuffer = new Float32Array(this.outputBufferSize);
 97 |       this.lastOutput = new Float32Array(this.channels);
 98 |     }
 99 |     catch (error) {
100 |       this.outputBuffer = [];
101 |       this.lastOutput = [];
102 |     }
103 |   };
104 | 
105 |   Resampler.prototype.linearInterpolation = function () {
106 |     this.resampler = function (buffer) {
107 |       var bufferLength = buffer.length,
108 |         channels = this.channels,
109 |         outLength,
110 |         ratioWeight,
111 |         weight,
112 |         firstWeight,
113 |         secondWeight,
114 |         sourceOffset,
115 |         outputOffset,
116 |         outputBuffer,
117 |         channel;
118 | 
119 |       if ((bufferLength % channels) !== 0) {
120 |         throw(new Error(INCORRECT_BUFFER_LENGTH));
121 |       }
122 |       if (bufferLength <= 0) {
123 |         return (this.noReturn) ? 0 : [];
124 |       }
125 | 
126 |       outLength = this.outputBufferSize;
127 |       ratioWeight = this.ratioWeight;
128 |       weight = this.lastWeight;
129 |       firstWeight = 0;
130 |       secondWeight = 0;
131 |       sourceOffset = 0;
132 |       outputOffset = 0;
133 |       outputBuffer = this.outputBuffer;
134 | 
135 |       for (; weight < 1; weight += ratioWeight) {
136 |         secondWeight = weight % 1;
137 |         firstWeight = 1 - secondWeight;
138 |         this.lastWeight = weight % 1;
139 |         for (channel = 0; channel < this.channels; ++channel) {
140 |           outputBuffer[outputOffset++] = (this.lastOutput[channel] * firstWeight) + (buffer[channel] * secondWeight);
141 |         }
142 |       }
143 |       weight -= 1;
144 |       for (bufferLength -= channels, sourceOffset = Math.floor(weight) * channels; outputOffset < outLength && sourceOffset < bufferLength;) {
145 |         secondWeight = weight % 1;
146 |         firstWeight = 1 - secondWeight;
147 |         for (channel = 0; channel < this.channels; ++channel) {
148 |           outputBuffer[outputOffset++] = (buffer[sourceOffset((channel > 0) ? (" + " + channel) : "")] * firstWeight) + (buffer[sourceOffset(channels + channel)] * secondWeight);
149 |         }
150 |         weight += ratioWeight;
151 |         sourceOffset = Math.floor(weight) * channels;
152 |       }
153 |       for (channel = 0; channel < channels; ++channel) {
154 |         this.lastOutput[channel] = buffer[sourceOffset++];
155 |       }
156 |       return this.bufferSlice(outputOffset);
157 |     };
158 |   };
159 | 
160 |   Resampler.prototype.multiTap = function () {
161 |     this.resampler = function (buffer) {
162 |       var bufferLength = buffer.length,
163 |         outLength,
164 |         output_variable_list,
165 |         channels = this.channels,
166 |         ratioWeight,
167 |         weight,
168 |         channel,
169 |         actualPosition,
170 |         amountToNext,
171 |         alreadyProcessedTail,
172 |         outputBuffer,
173 |         outputOffset,
174 |         currentPosition;
175 | 
176 |       if ((bufferLength % channels) !== 0) {
177 |         throw(new Error(INCORRECT_BUFFER_LENGTH));
178 |       }
179 |       if (bufferLength <= 0) {
180 |         return (this.noReturn) ? 0 : [];
181 |       }
182 | 
183 |       outLength = this.outputBufferSize;
184 |       output_variable_list = [];
185 |       ratioWeight = this.ratioWeight;
186 |       weight = 0;
187 |       actualPosition = 0;  
188 |       amountToNext = 0;
189 |       alreadyProcessedTail = !this.tailExists;
190 |       this.tailExists = false;
191 |       outputBuffer = this.outputBuffer;
192 |       outputOffset = 0;
193 |       currentPosition = 0;
194 |             
195 |       for (channel = 0; channel < channels; ++channel) {
196 |         output_variable_list[channel] = 0;
197 |       }
198 | 
199 |       do {
200 |         if (alreadyProcessedTail) {
201 |           weight = ratioWeight;
202 |           for (channel = 0; channel < channels; ++channel) {
203 |             output_variable_list[channel] = 0;
204 |           }
205 |         } else {
206 |           weight = this.lastWeight;
207 |           for (channel = 0; channel < channels; ++channel) {
208 |             output_variable_list[channel] = this.lastOutput[channel];
209 |           }
210 |           alreadyProcessedTail = true;
211 |         }
212 |         while (weight > 0 && actualPosition < bufferLength) {
213 |           amountToNext = 1 + actualPosition - currentPosition;
214 |           if (weight >= amountToNext) {
215 |             for (channel = 0; channel < channels; ++channel) {
216 |               output_variable_list[channel] += buffer[actualPosition++] * amountToNext;
217 |             }
218 |             currentPosition = actualPosition;
219 |             weight -= amountToNext;
220 |           } else {
221 |             for (channel = 0; channel < channels; ++channel) {
222 |               output_variable_list[channel] += buffer[actualPosition + ((channel > 0) ? (" + " + channel) : "")] * weight;
223 |             }
224 |             currentPosition += weight;
225 |             weight = 0;
226 |             break;
227 |           }
228 |         }
229 |             
230 |         if (weight === 0) {
231 |           for (channel = 0; channel < channels; ++channel) {
232 |             outputBuffer[outputOffset++] = output_variable_list[channel] / ratioWeight;
233 |           }
234 |         } else {
235 |           this.lastWeight = weight;
236 |           for (channel = 0; channel < channels; ++channel) {
237 |             this.lastOutput[channel] = output_variable_list[channel];
238 |           }
239 |           this.tailExists = true;
240 |           break;
241 |         }
242 |       } while (actualPosition < bufferLength && outputOffset < outLength);
243 |         return this.bufferSlice(outputOffset);
244 |       };
245 |   };
246 | 
247 |   worker_instance.Resampler = Resampler;
248 | 
249 | }(self));


--------------------------------------------------------------------------------