├── .github └── workflows │ └── workflow.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── lua_deepspeech.c └── main.lua /.github/workflows/workflow.yml: -------------------------------------------------------------------------------- 1 | name: workflow 2 | 3 | on: push 4 | 5 | jobs: 6 | build: 7 | strategy: 8 | matrix: 9 | os: [ubuntu-latest, windows-latest] 10 | include: 11 | - os: ubuntu-latest 12 | deepspeech-os: linux 13 | toolchain: '' 14 | - os: windows-latest 15 | deepspeech-os: win 16 | toolchain: -DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake 17 | 18 | runs-on: ${{matrix.os}} 19 | 20 | env: 21 | BUILD_TYPE: Release 22 | DEEPSPEECH_URL: https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/native_client.amd64.cpu.${{matrix.deepspeech-os}}.tar.xz 23 | 24 | steps: 25 | - uses: actions/checkout@v2 26 | 27 | - name: Install Lua 28 | if: ${{matrix.os == 'ubuntu-latest'}} 29 | run: sudo apt install liblua5.1-dev 30 | 31 | - name: Install Lua 32 | if: ${{matrix.os == 'windows-latest'}} 33 | run: vcpkg install lua 34 | 35 | - name: Create Build Environment 36 | run: cmake -E make_directory ${{github.workspace}}/build ${{github.workspace}}/build/deepspeech 37 | 38 | - name: Download DeepSpeech 39 | shell: bash 40 | working-directory: ${{github.workspace}}/build/deepspeech 41 | run: curl -sL ${DEEPSPEECH_URL} | tar xJ 42 | 43 | - name: Configure CMake 44 | working-directory: ${{github.workspace}}/build 45 | run: cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DDEEPSPEECH_PATH=${{github.workspace}}/build/deepspeech ${{matrix.toolchain}} 46 | 47 | - name: Build 48 | working-directory: ${{github.workspace}}/build 49 | run: cmake --build . --config $BUILD_TYPE 50 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1.0) 2 | project(lua-deepspeech) 3 | 4 | add_library(lua-deepspeech MODULE lua_deepspeech.c) 5 | set_target_properties(lua-deepspeech PROPERTIES PREFIX "") 6 | 7 | if(DEEPSPEECH_PATH) 8 | add_library(deepspeech SHARED IMPORTED GLOBAL) 9 | target_include_directories(lua-deepspeech PRIVATE "${DEEPSPEECH_PATH}") 10 | target_link_libraries(lua-deepspeech PRIVATE deepspeech) 11 | if(WIN32) 12 | set_target_properties(deepspeech PROPERTIES IMPORTED_IMPLIB "${DEEPSPEECH_PATH}/libdeepspeech.so.if.lib") 13 | set_target_properties(deepspeech PROPERTIES IMPORTED_LOCATION "${DEEPSPEECH_PATH}/libdeepspeech.so") 14 | else() 15 | set_target_properties(deepspeech PROPERTIES IMPORTED_LOCATION "${DEEPSPEECH_PATH}/libdeepspeech.so") 16 | endif() 17 | else() 18 | message(FATAL_ERROR "Deepspeech path not found. Set DEEPSPEECH_PATH to the path to the deepspeech native client library.") 19 | endif() 20 | 21 | if(LOVR) 22 | set(LOVR_PLUGIN_TARGETS lua-deepspeech deepspeech) 23 | else() 24 | include(FindLua) 25 | find_package(Lua REQUIRED) 26 | target_include_directories(lua-deepspeech PRIVATE "${LUA_INCLUDE_DIR}") 27 | target_link_libraries(lua-deepspeech PRIVATE "${LUA_LIBRARIES}") 28 | endif() 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021 Bjorn Swenson 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | lua-deepspeech 2 | === 3 | 4 | Lua bindings for [DeepSpeech](https://github.com/Mozilla/DeepSpeech), an open source speech 5 | recognition library. Intended for use with [LÖVR](https://lovr.org) and [LÖVE](https://love2d.org), 6 | but it should work with any Lua program that has audio samples in a table or a lightuserdata. 7 | 8 | Here's a simple example of using it to do speech-to-text on an audio file: 9 | 10 | ```lua 11 | lovr.speech = require 'lua-deepspeech' 12 | 13 | function lovr.load() 14 | lovr.speech.init({ model = '/path/to/model.pbmm' }) 15 | 16 | local sound = lovr.data.newSound('speech.ogg') 17 | local samples = sound:getBlob():getPointer() 18 | local count = sound:getFrameCount() 19 | 20 | print(lovr.speech.decode(samples, count)) 21 | end 22 | ``` 23 | 24 | DeepSpeech Setup 25 | --- 26 | 27 | - Download the DeepSpeech native client library. It can be found on the [DeepSpeech releases page](https://github.com/Mozilla/DeepSpeech/releases/latest) 28 | and will be named something like `native_client....tar.xz`. The most recent 29 | version tested is **0.9.3**. It should contain a `deepspeech.h` file and a platform-specific 30 | library, like a .so or .dll file. 31 | - Download the speech recognition model from the same release page. It's a huge `pbmm` file. 32 | 33 | > Note: There are multiple flavors of the native client. The `cpu` flavor runs on the CPU, the 34 | `cuda` flavor runs on the GPU with CUDA, and the `tflite` flavor can use the smaller tflite model 35 | instead of the pbmm one. It's recommended to start with the `cpu` flavor. 36 | 37 | ### Scorer 38 | 39 | You can also optionally create a thing called a "scorer package". The scorer acts as the grammar 40 | or vocabulary for the recognition, allowing it to recognize a custom set of words or phrases. This 41 | can improve accuracy and speed by a lot, and is useful if you only have a few words or commands that 42 | need to be detected. See [here](https://deepspeech.readthedocs.io/en/v0.9.3/Scorer.html) for 43 | instructions on generating a scorer. 44 | 45 | Building 46 | --- 47 | 48 | Once you have the DeepSpeech files downloaded, build the Lua bindings in this repository. You can 49 | download prebuilt files from the releases page (TBD, still trying to get GitHub Actions working on 50 | Windows) or build them using CMake. If you're using LÖVR you can also add this repository to the 51 | `plugins` folder and rebuild. The `DEEPSPEECH_PATH` variable needs to be set to the path to the 52 | native client. 53 | 54 | ```sh 55 | $ mkdir build 56 | $ cd build 57 | $ cmake .. -DDEEPSPEECH_PATH=/path/to/native_client 58 | $ cmake --build . 59 | ``` 60 | 61 | This should output `lua-deepspeech.dll` or `lua-deepspeech.so`. 62 | 63 | The deepspeech native_client library needs to be placed somewhere that it can be loaded at runtime 64 | and the lua-deepspeech library needs to be somewhere that it can be required by Lua. For LÖVR both 65 | of these can be put next to the lovr executable (building as a plugin will take care of this). 66 | For other engines it will probably be different. 67 | 68 | > Note: on Windows the deepspeech library has a really weird name: `libdeepspeech.so` 69 | 70 | Usage 71 | --- 72 | 73 | First, require the module: 74 | 75 | ```lua 76 | local speech = require 'lua-deepspeech' 77 | ``` 78 | 79 | It returns a table with the library's functionality. 80 | 81 | ```lua 82 | success, sampleRate = speech.init(options) 83 | ``` 84 | 85 | The library must be initialized with an options table. The table can contain the following options: 86 | 87 | - `options.model` should be a full path to the deepspeech model file (pbmm). If this file is stored 88 | in a zip archive fused to the executable it will need to be written to disk first. 89 | - `options.scorer` is an optional a path to the scorer package. 90 | - `options.beamWidth` is an optional beam width number. A higher beam width increases accuracy at 91 | the cost of performance. 92 | - `options.alpha` and `options.beta` are optional paramters for the scorer. Usually the defaults 93 | are fine. 94 | 95 | The function either returns false plus an error message or true and the audio sample rate that the 96 | model was trained against. All audio must be provided as **signed 16 bit mono** samples at this 97 | sample rate. It's almost always 16000Hz. 98 | 99 | ```lua 100 | text = speech.decode(table) 101 | text = speech.decode(pointer, count) 102 | ``` 103 | 104 | This function performs speech-to-text. A table of audio samples can be provided, or a lightuserdata 105 | pointer with a sample count. 106 | 107 | In all cases the audio data must be formatted as **signed 16 bit mono** samples at the model's 108 | sample rate. 109 | 110 | Returns a string with the decoded text. 111 | 112 | ```lua 113 | transcripts = speech.analyze(table, limit) 114 | transcripts = speech.analyze(pointer, count, limit) 115 | ``` 116 | 117 | This is the same as `decode`, but returns extra metadata about the result. The return value is a 118 | list of transcripts. Each transcript is a table with: 119 | 120 | - `confidence` is the confidence level. May be negative. Transcripts are sorted by confidence. 121 | - `tokens` a list of tokens (i.e. letters) that were decoded. 122 | - `times` a list of timestamps for each token, in seconds. 123 | 124 | `limit` can optionally be used to limit the number of transcripts returned, defaulting to 5. 125 | 126 | ```lua 127 | speech.boost(word, amount) 128 | ``` 129 | 130 | Boosts a word. 131 | 132 | ```lua 133 | speech.unboost(word) 134 | speech.unboost() 135 | ``` 136 | 137 | Unboosts a word, or unboosts all words if no arguments are provided. 138 | 139 | ### Streams 140 | 141 | A stream object can be used to decode audio in real time as it arrives. Usually you'd use this with 142 | audio coming from a microphone. 143 | 144 | ```lua 145 | stream = speech.newStream() 146 | ``` 147 | 148 | Creates a new Stream. 149 | 150 | ```lua 151 | Stream:feed(table) 152 | Stream:feed(pointer, count) 153 | ``` 154 | 155 | Feeds audio to the Stream. Accepts the same arguments as `speech.decode`. 156 | 157 | ```lua 158 | text = Stream:decode() 159 | ``` 160 | 161 | Performs an intermediate decode on the audio data fed to the Stream, returning the decoded text. 162 | Additional audio can continue to be fed to the Stream after this function is called. 163 | 164 | ```lua 165 | transcripts = Stream:analyze() 166 | ``` 167 | 168 | Performs an intermediate analysis on the audio data fed to the Stream. See `speech.analyze`. 169 | Additional audio can continue to be fed to the Stream after this function is called. 170 | 171 | ```lua 172 | text = Stream:finish() 173 | ``` 174 | 175 | Finishes and resets the Stream, returning the final decoded text. 176 | 177 | ```lua 178 | Stream:clear() 179 | ``` 180 | 181 | Resets the Stream, erasing all audio that has been fed to it. 182 | 183 | Tips 184 | --- 185 | 186 | - Although DeepSpeech performs at realtime speeds, it's still a good idea to offload the decoding 187 | to a separate thread, especially when rendering realtime graphics alongside speech recognition. 188 | - If you are getting garbage results, ensure you're using the correct sample rate and audio format. 189 | DeepSpeech is also somewhat sensitive to background noise and low volume levels. To improve 190 | accuracy further, consider using a custom scorer. 191 | - When feeding audio to a stream, varying the size of the chunks of audio you feed can be used to 192 | trade off latency for performance. 193 | 194 | License 195 | --- 196 | 197 | MIT, see [`LICENSE`](LICENSE) for details. 198 | -------------------------------------------------------------------------------- /lua_deepspeech.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define CHECK(c, ...) if (!(c)) { return luaL_error(L, __VA_ARGS__); } 10 | 11 | #ifdef _WIN32 12 | #define LDS_EXPORT __declspec(dllexport) 13 | #else 14 | #define LDS_EXPORT 15 | #endif 16 | 17 | static struct { 18 | ModelState* modelState; 19 | size_t bufferSize; 20 | short* buffer; 21 | } state; 22 | 23 | typedef struct { 24 | StreamingState* handle; 25 | } lds_Stream; 26 | 27 | static const short* lds_checksamples(lua_State* L, int index, size_t* count) { 28 | if (lua_istable(L, index)) { 29 | *count = lua_objlen(L, index); 30 | 31 | if (state.bufferSize < *count) { 32 | state.bufferSize += !state.bufferSize; 33 | do { state.bufferSize <<= 1; } while (state.bufferSize < *count); 34 | state.buffer = realloc(state.buffer, state.bufferSize); 35 | } 36 | 37 | for (size_t i = 0; i < *count; i++) { 38 | lua_rawgeti(L, index, i + 1); 39 | lua_Integer x = lua_tointeger(L, -1); 40 | lua_pop(L, 1); 41 | 42 | if (x < INT16_MIN || x > INT16_MAX) { 43 | luaL_error(L, "Sample #%d (%d) is out of range [%d,%d]", i + 1, x, INT16_MIN, INT16_MAX); 44 | } 45 | 46 | state.buffer[i] = x; 47 | } 48 | 49 | return state.buffer; 50 | } else if (lua_type(L, index) == LUA_TLIGHTUSERDATA) { 51 | return *count = luaL_checkinteger(L, index + 1), lua_touserdata(L, index); 52 | } 53 | 54 | return NULL; 55 | } 56 | 57 | static void lds_pushmetadata(lua_State* L, Metadata* metadata) { 58 | lua_createtable(L, metadata->num_transcripts, 0); 59 | for (int i = 0; i < metadata->num_transcripts; i++) { 60 | const CandidateTranscript* transcript = &metadata->transcripts[i]; 61 | lua_createtable(L, 0, 3); 62 | 63 | lua_pushnumber(L, transcript->confidence); 64 | lua_setfield(L, -2, "confidence"); 65 | 66 | lua_createtable(L, transcript->num_tokens, 0); 67 | for (int j = 0; j < transcript->num_tokens; j++) { 68 | lua_pushnumber(L, transcript->tokens[j].start_time); 69 | lua_rawseti(L, -2, j + 1); 70 | } 71 | lua_setfield(L, -2, "times"); 72 | 73 | lua_createtable(L, transcript->num_tokens, 0); 74 | for (int j = 0; j < transcript->num_tokens; j++) { 75 | lua_pushstring(L, transcript->tokens[j].text); 76 | lua_rawseti(L, -2, j + 1); 77 | } 78 | lua_setfield(L, -2, "tokens"); 79 | 80 | lua_rawseti(L, -2, i + 1); 81 | } 82 | } 83 | 84 | static int lds_init(lua_State* L) { 85 | luaL_argcheck(L, lua_istable(L, 1), 1, "Expected config to be a table"); 86 | 87 | if (state.modelState) { 88 | DS_FreeModel(state.modelState); 89 | state.modelState = NULL; 90 | } 91 | 92 | const char* model = NULL; 93 | const char* scorer = NULL; 94 | 95 | lua_getfield(L, 1, "model"); 96 | CHECK(lua_type(L, -1) == LUA_TSTRING, "config.model should be a string containing a path to the pbmm file"); 97 | model = lua_tostring(L, -1); 98 | lua_pop(L, 1); 99 | 100 | lua_getfield(L, 1, "scorer"); 101 | int type = lua_type(L, -1); 102 | CHECK(type == LUA_TNIL || type == LUA_TSTRING, "config.scorer should be nil or a string"); 103 | scorer = lua_tostring(L, -1); 104 | lua_pop(L, 1); 105 | 106 | int err = DS_CreateModel(model, &state.modelState); 107 | if (err) { 108 | lua_pushboolean(L, false); 109 | char* message = DS_ErrorCodeToErrorMessage(err); 110 | lua_pushstring(L, message); 111 | DS_FreeString(message); 112 | return 2; 113 | } 114 | 115 | lua_getfield(L, 1, "beamWidth"); 116 | if (!lua_isnil(L, -1)) { 117 | DS_SetModelBeamWidth(state.modelState, luaL_checkinteger(L, -1)); 118 | } 119 | lua_pop(L, 1); 120 | 121 | if (scorer) { 122 | CHECK(DS_EnableExternalScorer(state.modelState, scorer) == 0, "Failed to set scorer"); 123 | 124 | lua_getfield(L, 1, "alpha"); 125 | float alpha = lua_tonumber(L, -1); 126 | lua_pop(L, 1); 127 | 128 | lua_getfield(L, 1, "beta"); 129 | float beta = lua_tonumber(L, -1); 130 | lua_pop(L, 1); 131 | 132 | if (alpha != 0.f || beta != 0.f) { 133 | CHECK(DS_SetScorerAlphaBeta(state.modelState, alpha, beta) == 0, "Failed to set scorer alpha/beta"); 134 | } 135 | } 136 | 137 | lua_pushboolean(L, true); 138 | lua_pushinteger(L, DS_GetModelSampleRate(state.modelState)); 139 | return 2; 140 | } 141 | 142 | static int lds_destroy(lua_State* L) { 143 | if (state.modelState) { 144 | DS_FreeModel(state.modelState); 145 | state.modelState = NULL; 146 | } 147 | state.bufferSize = 0; 148 | free(state.buffer); 149 | return 0; 150 | } 151 | 152 | static int lds_decode(lua_State* L) { 153 | size_t sampleCount; 154 | CHECK(state.modelState != NULL, "DeepSpeech is not initialized"); 155 | const short* samples = lds_checksamples(L, 1, &sampleCount); 156 | CHECK(samples != NULL, "Expected a table or lightuserdata pointer for audio sample data"); 157 | char* text = DS_SpeechToText(state.modelState, samples, sampleCount); 158 | lua_pushstring(L, text); 159 | DS_FreeString(text); 160 | return 1; 161 | } 162 | 163 | static int lds_analyze(lua_State* L) { 164 | size_t sampleCount; 165 | CHECK(state.modelState != NULL, "DeepSpeech is not initialized"); 166 | const short* samples = lds_checksamples(L, 1, &sampleCount); 167 | CHECK(samples != NULL, "Expected a table or lightuserdata pointer for audio sample data"); 168 | uint32_t limit = luaL_optinteger(L, lua_istable(L, 1) ? 2 : 3, 3); 169 | Metadata* metadata = DS_SpeechToTextWithMetadata(state.modelState, samples, sampleCount, limit); 170 | lds_pushmetadata(L, metadata); 171 | DS_FreeMetadata(metadata); 172 | return 1; 173 | } 174 | 175 | static int lds_boost(lua_State* L) { 176 | CHECK(state.modelState != NULL, "DeepSpeech is not initialized"); 177 | const char* word = luaL_checkstring(L, 1); 178 | float boost = luaL_checknumber(L, 2); 179 | DS_AddHotWord(state.modelState, word, boost); 180 | return 0; 181 | } 182 | 183 | static int lds_unboost(lua_State* L) { 184 | CHECK(state.modelState != NULL, "DeepSpeech is not initialized"); 185 | const char* word = lua_tostring(L, 1); 186 | if (word) { 187 | DS_EraseHotWord(state.modelState, word); 188 | } else { 189 | DS_ClearHotWords(state.modelState); 190 | } 191 | return 0; 192 | } 193 | 194 | static int lds_newStream(lua_State* L) { 195 | CHECK(state.modelState != NULL, "DeepSpeech is not initialized"); 196 | lds_Stream* stream = (lds_Stream*) lua_newuserdata(L, sizeof(lds_Stream)); 197 | CHECK(DS_CreateStream(state.modelState, &stream->handle) == 0, "Could not create stream"); 198 | luaL_getmetatable(L, "lds_Stream"); 199 | lua_setmetatable(L, -2); 200 | return 1; 201 | } 202 | 203 | static int lds_stream_feed(lua_State* L) { 204 | size_t sampleCount; 205 | lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream"); 206 | const short* samples = lds_checksamples(L, 2, &sampleCount); 207 | CHECK(samples != NULL, "Expected a table or lightuserdata pointer for audio sample data"); 208 | DS_FeedAudioContent(stream->handle, samples, sampleCount); 209 | return 0; 210 | } 211 | 212 | static int lds_stream_decode(lua_State* L) { 213 | lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream"); 214 | char* text = DS_IntermediateDecode(stream->handle); 215 | lua_pushstring(L, text); 216 | DS_FreeString(text); 217 | return 1; 218 | } 219 | 220 | static int lds_stream_analyze(lua_State* L) { 221 | lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream"); 222 | uint32_t limit = luaL_optinteger(L, 2, 3); 223 | Metadata* metadata = DS_IntermediateDecodeWithMetadata(stream->handle, limit); 224 | lds_pushmetadata(L, metadata); 225 | DS_FreeMetadata(metadata); 226 | return 1; 227 | } 228 | 229 | static int lds_stream_finish(lua_State* L) { 230 | lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream"); 231 | char* text = DS_FinishStream(stream->handle); 232 | lua_pushstring(L, text); 233 | DS_FreeString(text); 234 | DS_CreateStream(state.modelState, &stream->handle); 235 | return 1; 236 | } 237 | 238 | static int lds_stream_clear(lua_State* L) { 239 | lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream"); 240 | DS_FreeStream(stream->handle); 241 | DS_CreateStream(state.modelState, &stream->handle); 242 | return 0; 243 | } 244 | 245 | static int lds_stream_destroy(lua_State* L) { 246 | lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream"); 247 | DS_FreeStream(stream->handle); 248 | return 0; 249 | } 250 | 251 | static const luaL_Reg lds_api[] = { 252 | { "init", lds_init }, 253 | { "decode", lds_decode }, 254 | { "analyze", lds_analyze }, 255 | { "boost", lds_boost }, 256 | { "unboost", lds_unboost }, 257 | { "newStream", lds_newStream }, 258 | { NULL, NULL }, 259 | }; 260 | 261 | static const luaL_Reg lds_stream_api[] = { 262 | { "feed", lds_stream_feed }, 263 | { "decode", lds_stream_decode }, 264 | { "analyze", lds_stream_analyze }, 265 | { "finish", lds_stream_finish }, 266 | { "clear", lds_stream_clear }, 267 | { "__gc", lds_stream_destroy }, 268 | { NULL, NULL } 269 | }; 270 | 271 | LDS_EXPORT int luaopen_deepspeech(lua_State* L) { 272 | lua_newtable(L); 273 | luaL_register(L, NULL, lds_api); 274 | 275 | // Add sentinel userdata to free the model state on GC 276 | lua_newuserdata(L, sizeof(void*)); 277 | lua_createtable(L, 0, 1); 278 | lua_pushcfunction(L, lds_destroy); 279 | lua_setfield(L, -2, "__gc"); 280 | lua_setmetatable(L, -2); 281 | lua_setfield(L, -2, ""); 282 | 283 | if (luaL_newmetatable(L, "lds_Stream")) { 284 | lua_pushvalue(L, -1); 285 | lua_setfield(L, -2, "__index"); 286 | luaL_register(L, NULL, lds_stream_api); 287 | lua_pop(L, 1); 288 | } else { 289 | return luaL_error(L, "Could not register lds_Stream metatable!"); 290 | } 291 | 292 | return 1; 293 | } 294 | -------------------------------------------------------------------------------- /main.lua: -------------------------------------------------------------------------------- 1 | lovr.speech = require 'lua-deepspeech' 2 | 3 | function lovr.load() 4 | lovr.speech.init({ 5 | model = lovr.filesystem.getSource() .. '/deepspeech-0.9.3-models.pbmm' 6 | }) 7 | 8 | -- Decode a sound file if provided 9 | if arg[1] then 10 | local sound = lovr.data.newSound(arg[1]) 11 | local count = sound:getFrameCount() 12 | local samples = sound:getBlob():getPointer() 13 | local text = lovr.speech.decode(samples, count) 14 | print(text) 15 | lovr.event.quit() 16 | return 17 | end 18 | 19 | -- Otherwise set up microphone capture and feed audio to a speech decoder stream 20 | sink = lovr.data.newSound(4096, 'f32', 'mono', 16000) 21 | lovr.audio.setDevice('capture', 'default', sink) 22 | lovr.audio.start('capture') 23 | stream = lovr.speech.newStream() 24 | end 25 | 26 | function lovr.update(dt) 27 | if sink:getFrameCount() > 1024 then 28 | stream:feed(sink:getFrames()) 29 | print(stream:decode()) 30 | end 31 | end 32 | --------------------------------------------------------------------------------