├── .gitignore ├── README.md └── speech2text /.gitignore: -------------------------------------------------------------------------------- 1 | /audio 2 | /deepspeech-bin 3 | /models 4 | /tmp.* 5 | /._audio 6 | *.tflite -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Open source offline speech recognition for Android using Mozilla's DeepSpeech in Termux 2 | 3 | ## Requirements 4 | - ~3GB of disk space during installation; afterwards only ~2GB 5 | - [Termux](https://f-droid.org/app/com.termux) 6 | - [Termux:API](https://f-droid.org/app/com.termux.api) 7 | 8 | ## Installation 9 | 10 | - Install the following (open source) apps: Termux, Termux:API 11 | - Open Termux and run 12 | `pkg i -y git && git clone https://github.com/T-vK/Termux-DeepSpeech.git && cd ./Termux-DeepSpeech && ./speech2text` 13 | 14 | This will take a while beacuse it needs to download a pre-trained DeepSpeech model and a DeepSpeech release. It will probably also ask for microphone permissions (which are required for obvious reasons). 15 | 16 | ## Usage 17 | If the installation was successful, you should now be able to use command `speech2text`. 18 | `speech2text` will listen to your microphone for (by default) 2 seconds and then print the words that were recognized. 19 | 20 | ## Advanced usage 21 | You could create bash scripts like this: 22 | ``` bash 23 | #!/data/data/com.termux/files/usr/bin/bash 24 | 25 | WORDS="$(speech2text)" # This will listen to the microphone for (by default) 2 seoncds and the write what you said in the variable WORDS 26 | 27 | echo "Recognized: $WORDS" # Show what you just said 28 | 29 | if [[ "$WORDS" =~ "light" ]]; then # If what you said contained the word "light" 30 | if [[ $WORDS =~ "on" ]]; then # If what you said contained the word "on" 31 | termux-tts-speak "Turning flashlight on" # Let a robot voice say "Turning flashlight on" 32 | termux-torch on # Turn the flashlight on 33 | elif [[ $WORDS =~ "of" ]]; then # If what you said contained the word "of" 34 | termux-tts-speak "Turning flashlight off" # Let a robot voice say "Turning flashlight off" 35 | termux-torch off # Turn the flashlight off 36 | fi 37 | elif [[ "$WORDS" =~ "heating" ]] || [[ "$WORDS" =~ "temperature" ]]; then # If what you said contained the word "heating" or "temerature" 38 | # Do whatever here... 39 | echo "Hello" 40 | else 41 | termux-tts-speak "You said: $WORDS" # Let a robot voice repeat what it thought you said... 42 | fi 43 | ``` 44 | 45 | If you install the [Termux:Widget](https://f-droid.org/app/com.termux.widget) app and save the above script under "$HOME/.shortcuts/tasks/" and make it executable for example like this: `chmod +x "$HOME/.shortcuts/tasks/speech-command"` (speech-command is the name of the script). 46 | You can then then create a widget that triggers the script. Or using the app [HomeBot](https://f-droid.org/app/com.abast.homebot) (open source) you can remap long-pressing the home button which usually triggers the Google voice assistent to run your speech-command script. 47 | 48 | 49 | ## Warning 50 | 51 | This is a very new script that has barely been tested. You might also have to install a TTS Engine ([Flite TTS Engine](https://f-droid.org/app/edu.cmu.cs.speech.tts.flite) is a good open source one) because I'm using text-to-speech commands a few times in the `Advanced usage` example. 52 | -------------------------------------------------------------------------------- /speech2text: -------------------------------------------------------------------------------- 1 | #!/data/data/com.termux/files/usr/bin/bash 2 | DEEPSPEECH_VERSION=0.9.3 3 | SPEECH_TIME=2 4 | 5 | ARCH="$(uname -m)" 6 | if [[ "$ARCH" == "aarch64" ]]; then 7 | ARCH="arm64" 8 | elif [[ "$ARCH" == "armv7l" ]]; then 9 | ARCH="armv7" 10 | fi 11 | 12 | SOURCE_DIR="$(pwd)" 13 | 14 | enterScriptDir() { 15 | SOURCE="${BASH_SOURCE[0]}" 16 | while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink 17 | DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" 18 | SOURCE="$(readlink "$SOURCE")" 19 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located 20 | done 21 | DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" 22 | cd "$DIR" 23 | } 24 | 25 | #Ensure we're in the script's directory 26 | enterScriptDir 27 | 28 | export LD_LIBRARY_PATH="$(pwd)/deepspeech-bin/" 29 | 30 | #Setup 31 | command -v speech2text > /dev/null || ln -s "$(pwd)/speech2text" "$HOME/../usr/bin/speech2text" 32 | ensure_requirements_installed() { 33 | command -v ffmpeg > /dev/null || pkg i -y ffmpeg 34 | #command -v mpv > /dev/null || pkg i -y mpv 35 | command -v wget > /dev/null || pkg i -y wget 36 | command -v termux-microphone-record > /dev/null || pkg i -y termux-api 37 | } 38 | download_deepspeech() { 39 | rm -rf ./deepspeech-bin 40 | mkdir ./deepspeech-bin 41 | cd ./deepspeech-bin 42 | wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/native_client.${ARCH}.cpu.android.tar.xz" -O ./native_client.tar.xz -q --show-progress 43 | tar xf ./native_client.tar.xz 44 | rm -f ./native_client.tar.xz 45 | cd .. 46 | } 47 | 48 | download_example_audio() { 49 | rm -rf ./audio 50 | wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/audio-${DEEPSPEECH_VERSION}.tar.gz" -O ./example-audio.tar.gz -q --show-progress 51 | tar xf ./example-audio.tar.gz 52 | rm -f ./example-audio.tar.gz 53 | } 54 | 55 | download_models() { 56 | rm -rf ./models 57 | wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.tflite" -O "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" -q --show-progress && \ 58 | wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.scorer" -O "./deepspeech-${DEEPSPEECH_VERSION}-models.scorer" -q --show-progress 59 | if [[ "$?" != 0 ]]; then 60 | wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.tar.gz" -O ./models.tar.gz -q --show-progress 61 | echo "Extracting ./models.tar.gz ..." 62 | tar xf ./models.tar.gz 63 | rm -f ./models.tar.gz 64 | mv ./deepspeech-*-models ./models 65 | rm -f ./._* 66 | rm -f ./models/._* 67 | fi 68 | } 69 | ensure_requirements_installed 70 | #if [ ! -f ./deepspeech-bin/deepspeech ]; then 71 | # download_deepspeech 72 | #fi 73 | 74 | DETECTED_DEEPSPEECH_VERSION=$(LD_LIBRARY_PATH="$LD_LIBRARY_PATH" ./deepspeech-bin/deepspeech --version 2>&1 | grep DeepSpeech | cut -d "v" -f2 | cut -d "-" -f1 | sed -e 's/DeepSpeech //') 75 | 76 | if [ "$DEEPSPEECH_VERSION" != "$DETECTED_DEEPSPEECH_VERSION" ]; then 77 | download_deepspeech 78 | download_example_audio 79 | download_models 80 | fi 81 | 82 | ls ./audio/*.wav &> /dev/null || download_example_audio 83 | 84 | if [ ! -f ./models/output_graph.tflite ] && [ ! -f "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" ]; then 85 | download_models 86 | fi 87 | 88 | # Important part of the script 89 | 90 | mic_listen() { 91 | rm -f ./tmp.mp4 92 | rm -f ./tmp.wav 93 | OUTPUT="$(termux-microphone-record -f ./tmp.mp4 -l $SPEECH_TIME -r 16000 -c 1 -e amr_nb)" 94 | if [[ "$OUTPUT" =~ "Recording error: null" ]]; then 95 | termux-notification --sound -t "Speech Recognition Error" -c "Can't access the mic. Make sure not other app is using the mic." 96 | exit 97 | fi 98 | } 99 | recognize() { 100 | termux-microphone-record -q > /dev/null 101 | ffmpeg -loglevel panic -i ./tmp.mp4 -f wav -bitexact -acodec pcm_s16le -ar 16000 -ac 1 ./tmp.wav 102 | rm -f ./tmp.mp4 103 | termux-media-player play ./tmp.wav > /dev/null & #play recording back before deepspeech 104 | 105 | if [[ "$DEEPSPEECH_VERSION" == "0.5.1" ]]; then 106 | ./deepspeech-bin/deepspeech --model ./models/output_graph.tflite --alphabet ./models/alphabet.txt --lm ./models/lm.binary --trie ./models/trie --audio ./tmp.wav 2>&1 | tail -1 107 | elif [ -f "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" ] ; then 108 | ./deepspeech-bin/deepspeech --model "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" --scorer "./deepspeech-${DEEPSPEECH_VERSION}-models.scorer" --audio ./tmp.wav 2>&1 | tail -1 109 | elif [ -f ./models/output_graph.tflite ] ; then 110 | ./deepspeech-bin/deepspeech --model ./models/output_graph.tflite --lm ./models/lm.binary --trie ./models/trie --audio ./tmp.wav 2>&1 | tail -1 111 | else 112 | echo "Error: Couldn't find the deepspeech models!" 113 | fi 114 | rm -f ./tmp.wav 115 | } 116 | 117 | 118 | mic_listen 119 | sleep $SPEECH_TIME 120 | recognize 121 | 122 | cd "$SOURCE_DIR" 123 | --------------------------------------------------------------------------------