├── .gitignore
├── README.md
└── speech2text


/.gitignore:
--------------------------------------------------------------------------------
1 | /audio
2 | /deepspeech-bin
3 | /models
4 | /tmp.*
5 | /._audio
6 | *.tflite


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Open source offline speech recognition for Android using Mozilla's DeepSpeech in Termux
 2 | 
 3 | ## Requirements
 4 | - ~3GB of disk space during installation; afterwards only ~2GB
 5 | - [Termux](https://f-droid.org/app/com.termux)
 6 | - [Termux:API](https://f-droid.org/app/com.termux.api)
 7 | 
 8 | ## Installation
 9 | 
10 | - Install the following (open source) apps: Termux, Termux:API
11 | - Open Termux and run
12 |     `pkg i -y git && git clone https://github.com/T-vK/Termux-DeepSpeech.git && cd ./Termux-DeepSpeech && ./speech2text`
13 | 
14 | This will take a while beacuse it needs to download a pre-trained DeepSpeech model and a DeepSpeech release. It will probably also ask for microphone permissions (which are required for obvious reasons).
15 | 
16 | ## Usage
17 | If the installation was successful, you should now be able to use command `speech2text`.
18 | `speech2text` will listen to your microphone for (by default) 2 seconds and then print the words that were recognized.
19 | 
20 | ## Advanced usage
21 | You could create bash scripts like this:
22 | ``` bash
23 | #!/data/data/com.termux/files/usr/bin/bash
24 | 
25 | WORDS="$(speech2text)"                            # This will listen to the microphone for (by default) 2 seoncds and the write what you said in the variable WORDS
26 | 
27 | echo "Recognized: $WORDS"                         # Show what you just said
28 | 
29 | if [[ "$WORDS" =~ "light" ]]; then                # If what you said contained the word "light"
30 |     if [[ $WORDS =~ "on" ]]; then                 # If what you said contained the word "on"
31 |         termux-tts-speak "Turning flashlight on"  # Let a robot voice say "Turning flashlight on"
32 |         termux-torch on                           # Turn the flashlight on
33 |     elif [[ $WORDS =~ "of" ]]; then               # If what you said contained the word "of"
34 |         termux-tts-speak "Turning flashlight off" # Let a robot voice say "Turning flashlight off"
35 |         termux-torch off                          # Turn the flashlight off
36 |     fi
37 | elif [[ "$WORDS" =~ "heating" ]] || [[ "$WORDS" =~ "temperature" ]]; then   # If what you said contained the word "heating" or "temerature"
38 |     # Do whatever here...
39 |     echo "Hello"
40 | else
41 |     termux-tts-speak "You said: $WORDS"           # Let a robot voice repeat what it thought you said...
42 | fi
43 | ```
44 | 
45 | If you install the [Termux:Widget](https://f-droid.org/app/com.termux.widget) app and save the above script under "$HOME/.shortcuts/tasks/" and make it executable for example like this: `chmod +x "$HOME/.shortcuts/tasks/speech-command"` (speech-command is the name of the script).
46 | You can then then create a widget that triggers the script. Or using the app [HomeBot](https://f-droid.org/app/com.abast.homebot) (open source) you can remap long-pressing the home button which usually triggers the Google voice assistent to run your speech-command script.
47 | 
48 | 
49 | ## Warning
50 | 
51 | This is a very new script that has barely been tested. You might also have to install a TTS Engine ([Flite TTS Engine](https://f-droid.org/app/edu.cmu.cs.speech.tts.flite) is a good open source one) because I'm using text-to-speech commands a few times in the `Advanced usage` example.
52 | 


--------------------------------------------------------------------------------
/speech2text:
--------------------------------------------------------------------------------
  1 | #!/data/data/com.termux/files/usr/bin/bash
  2 | DEEPSPEECH_VERSION=0.9.3
  3 | SPEECH_TIME=2
  4 | 
  5 | ARCH="$(uname -m)"
  6 | if [[ "$ARCH" == "aarch64" ]]; then
  7 |     ARCH="arm64"
  8 | elif [[ "$ARCH" == "armv7l" ]]; then
  9 |     ARCH="armv7"
 10 | fi
 11 | 
 12 | SOURCE_DIR="$(pwd)"
 13 | 
 14 | enterScriptDir() {
 15 |     SOURCE="${BASH_SOURCE[0]}"
 16 |     while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
 17 |         DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
 18 |         SOURCE="$(readlink "$SOURCE")"
 19 |         [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
 20 |     done
 21 |     DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
 22 |     cd "$DIR"
 23 | }
 24 | 
 25 | #Ensure we're in the script's directory
 26 | enterScriptDir
 27 | 
 28 | export LD_LIBRARY_PATH="$(pwd)/deepspeech-bin/"
 29 | 
 30 | #Setup
 31 | command -v speech2text > /dev/null || ln -s "$(pwd)/speech2text" "$HOME/../usr/bin/speech2text" 
 32 | ensure_requirements_installed() {
 33 |     command -v ffmpeg > /dev/null || pkg i -y ffmpeg
 34 |     #command -v mpv > /dev/null || pkg i -y mpv
 35 |     command -v wget > /dev/null || pkg i -y wget
 36 |     command -v termux-microphone-record > /dev/null || pkg i -y termux-api
 37 | }
 38 | download_deepspeech() {
 39 |     rm -rf ./deepspeech-bin
 40 |     mkdir ./deepspeech-bin
 41 |     cd ./deepspeech-bin
 42 |     wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/native_client.${ARCH}.cpu.android.tar.xz" -O ./native_client.tar.xz -q --show-progress
 43 |     tar xf ./native_client.tar.xz
 44 |     rm -f ./native_client.tar.xz
 45 |     cd ..
 46 | }
 47 | 
 48 | download_example_audio() {
 49 |     rm -rf ./audio
 50 |     wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/audio-${DEEPSPEECH_VERSION}.tar.gz" -O ./example-audio.tar.gz -q --show-progress
 51 |     tar xf ./example-audio.tar.gz
 52 |     rm -f ./example-audio.tar.gz
 53 | }
 54 | 
 55 | download_models() {
 56 |     rm -rf ./models
 57 |     wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.tflite" -O "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" -q --show-progress && \
 58 |     wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.scorer" -O "./deepspeech-${DEEPSPEECH_VERSION}-models.scorer" -q --show-progress
 59 |     if [[ "$?" != 0 ]]; then
 60 |         wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.tar.gz" -O ./models.tar.gz -q --show-progress
 61 |         echo "Extracting ./models.tar.gz ..."
 62 |         tar xf ./models.tar.gz
 63 |         rm -f ./models.tar.gz
 64 |         mv ./deepspeech-*-models ./models
 65 |         rm -f ./._*
 66 |         rm -f ./models/._*
 67 |     fi
 68 | }
 69 | ensure_requirements_installed
 70 | #if [ ! -f ./deepspeech-bin/deepspeech ]; then
 71 | #    download_deepspeech
 72 | #fi
 73 | 
 74 | DETECTED_DEEPSPEECH_VERSION=$(LD_LIBRARY_PATH="$LD_LIBRARY_PATH" ./deepspeech-bin/deepspeech --version 2>&1 | grep DeepSpeech | cut -d "v" -f2 | cut -d "-" -f1 | sed -e 's/DeepSpeech //')
 75 | 
 76 | if [ "$DEEPSPEECH_VERSION" != "$DETECTED_DEEPSPEECH_VERSION" ]; then
 77 |     download_deepspeech
 78 |     download_example_audio
 79 |     download_models
 80 | fi
 81 | 
 82 | ls ./audio/*.wav &> /dev/null || download_example_audio
 83 | 
 84 | if [ ! -f ./models/output_graph.tflite ] && [ ! -f "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" ]; then
 85 |     download_models
 86 | fi
 87 | 
 88 | # Important part of the script
 89 | 
 90 | mic_listen() {
 91 |     rm -f ./tmp.mp4
 92 |     rm -f ./tmp.wav
 93 |     OUTPUT="$(termux-microphone-record -f ./tmp.mp4 -l $SPEECH_TIME -r 16000 -c 1 -e amr_nb)"
 94 |     if [[ "$OUTPUT" =~ "Recording error: null" ]]; then
 95 |         termux-notification --sound -t "Speech Recognition Error" -c "Can't access the mic. Make sure not other app is using the mic."
 96 |         exit
 97 |     fi
 98 | }
 99 | recognize() {
100 |     termux-microphone-record -q > /dev/null
101 |     ffmpeg -loglevel panic -i ./tmp.mp4 -f wav -bitexact -acodec pcm_s16le -ar 16000 -ac 1 ./tmp.wav
102 |     rm -f ./tmp.mp4
103 |     termux-media-player play ./tmp.wav > /dev/null & #play recording back before deepspeech
104 | 
105 |     if [[ "$DEEPSPEECH_VERSION" == "0.5.1" ]]; then
106 |         ./deepspeech-bin/deepspeech --model ./models/output_graph.tflite --alphabet ./models/alphabet.txt --lm ./models/lm.binary --trie ./models/trie --audio ./tmp.wav 2>&1 | tail -1
107 |     elif [ -f "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" ] ; then
108 |         ./deepspeech-bin/deepspeech --model "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" --scorer "./deepspeech-${DEEPSPEECH_VERSION}-models.scorer" --audio ./tmp.wav 2>&1 | tail -1
109 |     elif [ -f ./models/output_graph.tflite ] ; then
110 |         ./deepspeech-bin/deepspeech --model ./models/output_graph.tflite --lm ./models/lm.binary --trie ./models/trie --audio ./tmp.wav 2>&1 | tail -1
111 |     else
112 |         echo "Error: Couldn't find the deepspeech models!"
113 |     fi
114 |     rm -f ./tmp.wav
115 | }
116 | 
117 | 
118 | mic_listen
119 | sleep $SPEECH_TIME
120 | recognize
121 | 
122 | cd "$SOURCE_DIR"
123 | 


--------------------------------------------------------------------------------