├── Dockerfile ├── LICENSE ├── README.md ├── audio ├── 1272-128104-0000.wav └── README.md ├── examples └── practical-example │ ├── Dockerfile │ ├── README.md │ └── supervisord.conf ├── img ├── 1.png ├── 2.png ├── 3.png ├── 4.png ├── 5.png └── 6.png ├── start.sh └── stop.sh /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:9 2 | MAINTAINER Eduardo Silva 3 | 4 | RUN apt-get update && apt-get install -y \ 5 | procps \ 6 | autoconf \ 7 | automake \ 8 | bzip2 \ 9 | g++ \ 10 | git \ 11 | gstreamer1.0-plugins-good \ 12 | gstreamer1.0-tools \ 13 | gstreamer1.0-pulseaudio \ 14 | gstreamer1.0-plugins-bad \ 15 | gstreamer1.0-plugins-base \ 16 | gstreamer1.0-plugins-ugly \ 17 | libatlas3-base \ 18 | libgstreamer1.0-dev \ 19 | libtool-bin \ 20 | make \ 21 | python2.7 \ 22 | python3 \ 23 | python-pip \ 24 | python-yaml \ 25 | python-simplejson \ 26 | python-gi \ 27 | subversion \ 28 | unzip \ 29 | wget \ 30 | build-essential \ 31 | python-dev \ 32 | sox \ 33 | zlib1g-dev && \ 34 | apt-get clean autoclean && \ 35 | apt-get autoremove -y && \ 36 | pip install ws4py==0.3.2 && \ 37 | pip install tornado && \ 38 | ln -s /usr/bin/python2.7 /usr/bin/python ; ln -s -f bash /bin/sh 39 | 40 | WORKDIR /opt 41 | 42 | RUN wget http://www.digip.org/jansson/releases/jansson-2.7.tar.bz2 && \ 43 | bunzip2 -c jansson-2.7.tar.bz2 | tar xf - && \ 44 | cd jansson-2.7 && \ 45 | ./configure && make -j $(nproc) && make check && make install && \ 46 | echo "/usr/local/lib" >> /etc/ld.so.conf.d/jansson.conf && ldconfig && \ 47 | rm /opt/jansson-2.7.tar.bz2 && rm -rf /opt/jansson-2.7 48 | 49 | RUN git clone https://github.com/kaldi-asr/kaldi && \ 50 | cd /opt/kaldi/tools && \ 51 | make -j $(nproc) && \ 52 | ./install_portaudio.sh && \ 53 | /opt/kaldi/tools/extras/install_mkl.sh && \ 54 | cd /opt/kaldi/src && ./configure --shared && \ 55 | sed -i '/-g # -O0 -DKALDI_PARANOID/c\-O3 -DNDEBUG' kaldi.mk && \ 56 | make clean -j $(nproc) && make -j $(nproc) depend && make -j $(nproc) && \ 57 | cd /opt/kaldi/src/online && make depend -j $(nproc) && make -j $(nproc) && \ 58 | cd /opt/kaldi/src/gst-plugin && sed -i 's/-lmkl_p4n//g' Makefile && make depend -j $(nproc) && make -j $(nproc) && \ 59 | cd /opt && \ 60 | git clone https://github.com/alumae/gst-kaldi-nnet2-online.git && \ 61 | cd /opt/gst-kaldi-nnet2-online/src && \ 62 | sed -i '/KALDI_ROOT?=\/home\/tanel\/tools\/kaldi-trunk/c\KALDI_ROOT?=\/opt\/kaldi' Makefile && \ 63 | make depend -j $(nproc) && make -j $(nproc) && \ 64 | rm -rf /opt/gst-kaldi-nnet2-online/.git/ && \ 65 | find /opt/gst-kaldi-nnet2-online/src/ -type f -not -name '*.so' -delete && \ 66 | rm -rf /opt/kaldi/.git && \ 67 | rm -rf /opt/kaldi/egs/ /opt/kaldi/windows/ /opt/kaldi/misc/ && \ 68 | find /opt/kaldi/src/ -type f -not -name '*.so' -delete && \ 69 | find /opt/kaldi/tools/ -type f \( -not -name '*.so' -and -not -name '*.so*' \) -delete && \ 70 | cd /opt && git clone https://github.com/alumae/kaldi-gstreamer-server.git && \ 71 | rm -rf /opt/kaldi-gstreamer-server/.git/ && \ 72 | rm -rf /opt/kaldi-gstreamer-server/test/ 73 | 74 | COPY start.sh stop.sh /opt/ 75 | 76 | RUN chmod +x /opt/start.sh && \ 77 | chmod +x /opt/stop.sh 78 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017, Eduardo Silva 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # docker-kaldi-gstreamer-server 2 | Dockerfile for [kaldi-gstreamer-server](https://github.com/alumae/kaldi-gstreamer-server). 3 | 4 | Synopsis 5 | -------- 6 | 7 | This dockerfile automatically builds master and worker servers that are explained at [Full-duplex Speech-to-text System for Estonian](http://ebooks.iospress.nl/volumearticle/37996) from Tanel Alumäe and implemented by himself at https://github.com/alumae/kaldi-gstreamer-server. 8 | 9 | Using this project you will be able to run an automatic speech recognition (ASR) server in a few minutes. 10 | 11 | Attention 12 | --------- 13 | 14 | The ASR server that will be set up here requires some [kaldi models](http://www.kaldi.org). In the docker image I will detail below, there are no kaldi models included. 15 | 16 | You must have these models on your machine. You must also have an yaml file describing these models. Please, check some examples [here](https://github.com/alumae/kaldi-gstreamer-server/blob/master/sample_worker.yaml), [here](https://github.com/alumae/kaldi-gstreamer-server/blob/master/estonian_worker.yaml) and [here](https://github.com/alumae/kaldi-gstreamer-server/blob/master/sample_english_nnet2.yaml) to find out how to write your own yaml files. 17 | 18 | There are some kaldi models available for download. I have tested my setup with this [one](https://phon.ioc.ee/~tanela/tedlium_nnet_ms_sp_online.tgz), which is for English. I'm trying to build a model for Brazilian Portuguese, but until now I didn't find enough free/open resources. 19 | 20 | 21 | Install docker 22 | -------------- 23 | 24 | Please, refer to https://docs.docker.com/engine/installation/. 25 | 26 | 27 | Get the image 28 | ------------- 29 | 30 | * Pull the image from Docker Hub (~ 900MB): 31 | 32 | `docker pull jcsilva/docker-kaldi-gstreamer-server` 33 | 34 | * Or you may build your own image (requires git): 35 | 36 | `docker build -t kaldi-gstreamer-server:1.0 https://github.com/jcsilva/docker-kaldi-gstreamer-server.git` 37 | 38 | In the next sections I'll assume you pulled the image from Docker Hub. If you have built your own image, simply change *jcsilva/docker-kaldi-gstreamer-server:latest* by your image name when appropriate. 39 | 40 | 41 | How to use 42 | ---------- 43 | 44 | It's possible to use the same docker in two scenarios. You may create the master and worker on the same host machine. Or you can create just a worker and connect it to an already existing master. These two situations are explained below. 45 | 46 | * Instantiate master server and worker server on the same machine: 47 | 48 | Assuming that your kaldi models are located at /media/kaldi_models on your host machine, create a container: 49 | 50 | ``` 51 | docker run -it -p 8080:80 -v /media/kaldi_models:/opt/models jcsilva/docker-kaldi-gstreamer-server:latest /bin/bash 52 | ``` 53 | 54 | And, inside the container, start the service: 55 | 56 | ``` 57 | /opt/start.sh -y /opt/models/nnet2.yaml 58 | ``` 59 | 60 | You will see that 2 .log files (worker.log and master.log) will be created at /opt of your containter. If everything goes ok, you will see some lines indicating that there is a worker available. In this case, you can go back to your host machine (`Ctrl+P and Ctrl+Q` on the container). Your ASR service will be listening on port 8080. 61 | 62 | For stopping the servers, you may execute the following command inside your container: 63 | ``` 64 | /opt/stop.sh 65 | ``` 66 | 67 | * Instantiate a worker server and connect it to a remote master: 68 | 69 | Assuming that your kaldi models are located at /media/kaldi_models on your host machine, create a container: 70 | 71 | ``` 72 | docker run -it -v /media/kaldi_models:/opt/models jcsilva/docker-kaldi-gstreamer-server:latest /bin/bash 73 | ``` 74 | 75 | And, inside the container, start the service: 76 | 77 | ``` 78 | /opt/start.sh -y /opt/models/nnet2.yaml -m master-server.com -p 8888 79 | ``` 80 | 81 | It instantiates a worker on your local host and connects it to a master server located at master-server.com:8888. 82 | 83 | You will see that a worker.log file will be created at /opt of your container. If everything goes ok, you will see some lines indicating that there is a worker available. 84 | 85 | For stopping the worker server, you may execute the following command inside your container: 86 | ``` 87 | /opt/stop.sh 88 | ``` 89 | 90 | Testing 91 | ------- 92 | 93 | First of all, please, check if your setup is ok. It can be done using your browser following these steps: 94 | 95 | 1. Open a websocket client in your browser (e.g: [Simple-WebSocket-Client](https://github.com/hakobera/Simple-WebSocket-Client) or http://www.websocket.org/echo.html). 96 | 97 | 2. Connect to your master server: `ws://MASTER_SERVER/client/ws/status`. If your master is on local host port 8080, you can try: `ws://localhost:8080/client/ws/status`. 98 | 99 | 3. If your setup is ok, the answer is going to be something like: `RESPONSE: {"num_workers_available": 1, "num_requests_processed": 0}`. 100 | 101 | After checking the setup, you should test your speech recognition service. For this, there are several options, and the following list gives some ideas: 102 | 103 | 1. You can download [this client](https://github.com/alumae/kaldi-gstreamer-server/blob/master/kaldigstserver/client.py) for your host machine and execute it. When the master is on the local host, port 8080 and you have a wav file sampled at 16 kHz located at /home/localhost/audio/, you can type: ```python client.py -u ws://localhost:8080/client/ws/speech -r 32000 /home/localhost/audio/sample16k.wav``` 104 | 105 | 2. You can use [Kõnele](http://kaljurand.github.io/K6nele/) for testing the service. It is an Android app that is freely available for downloading at Google Play. You must configure it to use your ASR service. Below you'll find some screenshots that may help you in this configuration. First, you should click on **Kõnele (fast recognition)**. Then, change the **WebSocket URL**. In my case, I connected to a master server located at ws://192.168.1.10:8080/client/ws/speech. After that, open a **notepad-like** application and change your input method to **Kõnele speech keyboard** and you'll see a **yellow button** instead of your traditional keyboard. Press this button and enjoy! 106 | 107 | 108 | Kõnele configuration 109 |   110 | Kõnele configuration 111 |   112 | Kõnele configuration 113 |   114 | Kõnele configuration 115 |   116 | Kõnele configuration 117 |   118 | Kõnele configuration 119 | 120 | 121 | 3. A Javascript client is available at http://kaljurand.github.io/dictate.js/. You must configure it to use your ASR service. 122 | 123 | 124 | Practical Example 125 | ----------------- 126 | 127 | This section describes a tested example. You may repeat all the steps and, in the end, you'll have an english ASR system working on your machine. For this example, I advise you to use a machine with at least 4GB RAM. 128 | 129 | On the host machine, we are going to work on the directory /media/kaldi_models. I'll assume you have all permissions necessary to execute the following commands. 130 | 131 | 1) Download a valid kaldi model: 132 | ``` 133 | cd /media/kaldi_models 134 | wget https://phon.ioc.ee/~tanela/tedlium_nnet_ms_sp_online.tgz 135 | tar -zxvf tedlium_nnet_ms_sp_online.tgz 136 | ``` 137 | 138 | 2) Copy an example yaml file to /media/kaldi_models: 139 | ``` 140 | wget https://raw.githubusercontent.com/alumae/kaldi-gstreamer-server/master/sample_english_nnet2.yaml -P /media/kaldi_models 141 | ``` 142 | 143 | 3) Update file contents: 144 | ``` 145 | find /media/kaldi_models/ -type f | xargs sed -i 's:test:/opt:g' 146 | sed -i 's:full-post-processor:#full-post-processor:g' /media/kaldi_models/sample_english_nnet2.yaml 147 | ``` 148 | 149 | 4) Instantiate master and worker on the same machine: 150 | ``` 151 | docker run -it -p 8080:80 -v /media/kaldi_models:/opt/models jcsilva/docker-kaldi-gstreamer-server:latest /bin/bash 152 | ``` 153 | 154 | 5) Inside the docker container, start the service: 155 | ``` 156 | /opt/start.sh -y /opt/models/sample_english_nnet2.yaml 157 | ``` 158 | 159 | 6) On your host machine, download a client example and test your setup with a given audio: 160 | ``` 161 | wget https://raw.githubusercontent.com/alumae/kaldi-gstreamer-server/master/kaldigstserver/client.py -P /tmp 162 | wget https://raw.githubusercontent.com/jcsilva/docker-kaldi-gstreamer-server/master/audio/1272-128104-0000.wav -P /tmp 163 | wget https://raw.githubusercontent.com/alumae/kaldi-gstreamer-server/master/test/data/bill_gates-TED.mp3 -P /tmp 164 | python /tmp/client.py -u ws://localhost:8080/client/ws/speech -r 32000 /tmp/1272-128104-0000.wav 165 | python /tmp/client.py -u ws://localhost:8080/client/ws/speech -r 8192 /tmp/bill_gates-TED.mp3 166 | ``` 167 | 168 | OBS: For running the client example, you must install ws4py version 0.3.2. This can be installed using `pip install --user ws4py==0.3.2`. You may also need simplejson and pyaudio. They may also be installed using pip. 169 | 170 | You should get these transcriptions: 171 | 172 | * Audio bill_gates-TED.mp3: 173 | 174 | and i was a kid the disaster we worry about most was a nuclear war. that's why we had a barrel like this down our basement filled with cans of food and water. when the nuclear attack came we were supposed to go downstairs hunker down and eat out of that barrel. today the grea/opt risk of global catastrophe. doesn't look like this instead it looks like this. if anything kills over ten million people in the next few decades it's most likely to be a highly infectious virus rather than a war. not missiles that microbes now part of the reason for this is that we have invested a huge amount in nuclear deterrence we've actually invested very little in a system to stop an epidemic. we're not ready for the next epidemic. 175 | 176 | * Audio 1272-128104-0000.wav: 177 | 178 | mr coulter is the apostle of the middle classes and we're glad to welcome his gospel. 179 | 180 | Credits 181 | -------- 182 | * [kaldi](http://www.kaldi.org) 183 | * [gst-kaldi-nnet2-online](https://github.com/alumae/gst-kaldi-nnet2-online) 184 | * [kaldi-gstreamer-server](https://github.com/alumae/kaldi-gstreamer-server) 185 | * [Kõnele](http://kaljurand.github.io/K6nele/) 186 | -------------------------------------------------------------------------------- /audio/1272-128104-0000.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcsilva/docker-kaldi-gstreamer-server/991462f2ffa4c40641b592176e1fdf2047ebe957/audio/1272-128104-0000.wav -------------------------------------------------------------------------------- /audio/README.md: -------------------------------------------------------------------------------- 1 | Audio from dev-clean Librispeech ASR Corpus (http://www.openslr.org/resources/12/dev-clean.tar.gz) 2 | -------------------------------------------------------------------------------- /examples/practical-example/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jcsilva/docker-kaldi-gstreamer-server 2 | MAINTAINER Eduardo Silva 3 | 4 | # This is the practical example part 5 | 6 | RUN mkdir -p /opt/models && cd /opt/models && \ 7 | wget --no-check-certificate https://phon.ioc.ee/~tanela/tedlium_nnet_ms_sp_online.tgz && \ 8 | tar -zxvf tedlium_nnet_ms_sp_online.tgz && \ 9 | wget https://raw.githubusercontent.com/alumae/kaldi-gstreamer-server/master/sample_english_nnet2.yaml -P /opt/models && \ 10 | find /opt/models/ -type f | xargs sed -i 's:test:/opt:g' && \ 11 | sed -i 's:full-post-processor:#full-post-processor:g' /opt/models/sample_english_nnet2.yaml 12 | 13 | RUN apt-get update && \ 14 | apt-get install -y supervisor && \ 15 | apt-get clean autoclean && \ 16 | apt-get autoremove -y && \ 17 | rm -rf /var/lib/apt/lists/* 18 | 19 | COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf 20 | 21 | EXPOSE 80 22 | 23 | CMD ["/usr/bin/supervisord"] 24 | -------------------------------------------------------------------------------- /examples/practical-example/README.md: -------------------------------------------------------------------------------- 1 | # Working example with single docker file 2 | This docker file automates the process described in the practial end to end example. 3 | It is purely meant for creating a basic test system very quickly. 4 | It creates a single Docker Image and starts the Kaldi master and a single worker. It also downloads a model. 5 | Due to the size of the model it may take a while to build this docker image. 6 | 7 | # Steps to run 8 | ## Build the docker image 9 | ``` 10 | docker build -t docker-kaldi-gstreamer-example:latest . 11 | ``` 12 | 13 | ## Run the docker image: 14 | ``` 15 | docker run -itd -p 8080:80 --shm-size=256m docker-kaldi-gstreamer-example:latest 16 | 17 | ``` 18 | 19 | ## Test the install 20 | On your host machine, download a client example and test your setup with a given audio: 21 | ``` 22 | wget https://raw.githubusercontent.com/alumae/kaldi-gstreamer-server/master/kaldigstserver/client.py -P /tmp 23 | wget https://raw.githubusercontent.com/alumae/kaldi-gstreamer-server/master/test/data/bill_gates-TED.mp3 -P /tmp 24 | python /tmp/client.py -u ws://localhost:8080/client/ws/speech -r 8192 /tmp/bill_gates-TED.mp3 25 | ``` 26 | -------------------------------------------------------------------------------- /examples/practical-example/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | 4 | [program:master] 5 | command=python /opt/kaldi-gstreamer-server/kaldigstserver/master_server.py --port=80 6 | numprocs=1 7 | autostart=true 8 | autorestart=true 9 | stderr_logfile=/opt/master.log 10 | 11 | 12 | [program:worker] 13 | environment=GST_PLUGIN_PATH=/opt/gst-kaldi-nnet2-online/src/:/opt/kaldi/src/gst-plugin/ 14 | command=python /opt/kaldi-gstreamer-server/kaldigstserver/worker.py -c /opt/models/sample_english_nnet2.yaml -u ws://localhost:80/worker/ws/speech 15 | numprocs=1 16 | autostart=true 17 | autorestart=true 18 | stderr_logfile=/opt/worker.log 19 | -------------------------------------------------------------------------------- /img/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcsilva/docker-kaldi-gstreamer-server/991462f2ffa4c40641b592176e1fdf2047ebe957/img/1.png -------------------------------------------------------------------------------- /img/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcsilva/docker-kaldi-gstreamer-server/991462f2ffa4c40641b592176e1fdf2047ebe957/img/2.png -------------------------------------------------------------------------------- /img/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcsilva/docker-kaldi-gstreamer-server/991462f2ffa4c40641b592176e1fdf2047ebe957/img/3.png -------------------------------------------------------------------------------- /img/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcsilva/docker-kaldi-gstreamer-server/991462f2ffa4c40641b592176e1fdf2047ebe957/img/4.png -------------------------------------------------------------------------------- /img/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcsilva/docker-kaldi-gstreamer-server/991462f2ffa4c40641b592176e1fdf2047ebe957/img/5.png -------------------------------------------------------------------------------- /img/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcsilva/docker-kaldi-gstreamer-server/991462f2ffa4c40641b592176e1fdf2047ebe957/img/6.png -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | MASTER="localhost" 4 | PORT=80 5 | 6 | usage(){ 7 | echo "Creates a worker and connects it to a master."; 8 | echo "If the master address is not given, a master will be created at localhost:80"; 9 | echo "Usage: $0 -y yaml_file [-m master address] [-p port number]"; 10 | } 11 | 12 | while getopts "h?m:p:y:" opt; do 13 | case "$opt" in 14 | h|\?) 15 | usage 16 | exit 0 17 | ;; 18 | m) MASTER=$OPTARG 19 | ;; 20 | p) PORT=$OPTARG 21 | ;; 22 | y) YAML=$OPTARG 23 | ;; 24 | esac 25 | done 26 | 27 | #yaml file must be specified 28 | if [ -z "$YAML" ] || [ ! -f "$YAML" ] ; then 29 | usage; 30 | exit 1; 31 | fi; 32 | 33 | 34 | if [ "$MASTER" == "localhost" ] ; then 35 | # start a local master 36 | python /opt/kaldi-gstreamer-server/kaldigstserver/master_server.py --port=$PORT 2>> /opt/master.log & 37 | fi 38 | 39 | #start worker and connect it to the master 40 | export GST_PLUGIN_PATH=/opt/gst-kaldi-nnet2-online/src/:/opt/kaldi/src/gst-plugin/ 41 | 42 | python /opt/kaldi-gstreamer-server/kaldigstserver/worker.py -c $YAML -u ws://$MASTER:$PORT/worker/ws/speech 2>> /opt/worker.log & 43 | -------------------------------------------------------------------------------- /stop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #kill worker 4 | ps axf | grep worker.py | grep -v grep | awk '{print "kill -15 " $1}' | sh 5 | 6 | #kill master 7 | ps axf | grep master_server.py | grep -v grep | awk '{print "kill -15 " $1}' | sh 8 | 9 | --------------------------------------------------------------------------------