├── .github
    └── workflows
    │   └── workflow.yml
├── CMakeLists.txt
├── LICENSE
├── README.md
├── lua_deepspeech.c
└── main.lua


/.github/workflows/workflow.yml:
--------------------------------------------------------------------------------
 1 | name: workflow
 2 | 
 3 | on: push
 4 | 
 5 | jobs:
 6 |   build:
 7 |     strategy:
 8 |       matrix:
 9 |         os: [ubuntu-latest, windows-latest]
10 |         include:
11 |           - os: ubuntu-latest
12 |             deepspeech-os: linux
13 |             toolchain: ''
14 |           - os: windows-latest
15 |             deepspeech-os: win
16 |             toolchain: -DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake
17 | 
18 |     runs-on: ${{matrix.os}}
19 | 
20 |     env:
21 |       BUILD_TYPE: Release
22 |       DEEPSPEECH_URL: https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/native_client.amd64.cpu.${{matrix.deepspeech-os}}.tar.xz
23 | 
24 |     steps:
25 |     - uses: actions/checkout@v2
26 | 
27 |     - name: Install Lua
28 |       if: ${{matrix.os == 'ubuntu-latest'}}
29 |       run: sudo apt install liblua5.1-dev
30 | 
31 |     - name: Install Lua
32 |       if: ${{matrix.os == 'windows-latest'}}
33 |       run: vcpkg install lua
34 | 
35 |     - name: Create Build Environment
36 |       run: cmake -E make_directory ${{github.workspace}}/build ${{github.workspace}}/build/deepspeech
37 | 
38 |     - name: Download DeepSpeech
39 |       shell: bash
40 |       working-directory: ${{github.workspace}}/build/deepspeech
41 |       run: curl -sL ${DEEPSPEECH_URL} | tar xJ
42 | 
43 |     - name: Configure CMake
44 |       working-directory: ${{github.workspace}}/build
45 |       run: cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DDEEPSPEECH_PATH=${{github.workspace}}/build/deepspeech ${{matrix.toolchain}}
46 | 
47 |     - name: Build
48 |       working-directory: ${{github.workspace}}/build
49 |       run: cmake --build . --config $BUILD_TYPE
50 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.1.0)
 2 | project(lua-deepspeech)
 3 | 
 4 | add_library(lua-deepspeech MODULE lua_deepspeech.c)
 5 | set_target_properties(lua-deepspeech PROPERTIES PREFIX "")
 6 | 
 7 | if(DEEPSPEECH_PATH)
 8 |   add_library(deepspeech SHARED IMPORTED GLOBAL)
 9 |   target_include_directories(lua-deepspeech PRIVATE "${DEEPSPEECH_PATH}")
10 |   target_link_libraries(lua-deepspeech PRIVATE deepspeech)
11 |   if(WIN32)
12 |     set_target_properties(deepspeech PROPERTIES IMPORTED_IMPLIB "${DEEPSPEECH_PATH}/libdeepspeech.so.if.lib")
13 |     set_target_properties(deepspeech PROPERTIES IMPORTED_LOCATION "${DEEPSPEECH_PATH}/libdeepspeech.so")
14 |   else()
15 |     set_target_properties(deepspeech PROPERTIES IMPORTED_LOCATION "${DEEPSPEECH_PATH}/libdeepspeech.so")
16 |   endif()
17 | else()
18 |   message(FATAL_ERROR "Deepspeech path not found.  Set DEEPSPEECH_PATH to the path to the deepspeech native client library.")
19 | endif()
20 | 
21 | if(LOVR)
22 |   set(LOVR_PLUGIN_TARGETS lua-deepspeech deepspeech)
23 | else()
24 |   include(FindLua)
25 |   find_package(Lua REQUIRED)
26 |   target_include_directories(lua-deepspeech PRIVATE "${LUA_INCLUDE_DIR}")
27 |   target_link_libraries(lua-deepspeech PRIVATE "${LUA_LIBRARIES}")
28 | endif()
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2021 Bjorn Swenson
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | lua-deepspeech
  2 | ===
  3 | 
  4 | Lua bindings for [DeepSpeech](https://github.com/Mozilla/DeepSpeech), an open source speech
  5 | recognition library.  Intended for use with [LÖVR](https://lovr.org) and [LÖVE](https://love2d.org),
  6 | but it should work with any Lua program that has audio samples in a table or a lightuserdata.
  7 | 
  8 | Here's a simple example of using it to do speech-to-text on an audio file:
  9 | 
 10 | ```lua
 11 | lovr.speech = require 'lua-deepspeech'
 12 | 
 13 | function lovr.load()
 14 |   lovr.speech.init({ model = '/path/to/model.pbmm' })
 15 | 
 16 |   local sound = lovr.data.newSound('speech.ogg')
 17 |   local samples = sound:getBlob():getPointer()
 18 |   local count = sound:getFrameCount()
 19 | 
 20 |   print(lovr.speech.decode(samples, count))
 21 | end
 22 | ```
 23 | 
 24 | DeepSpeech Setup
 25 | ---
 26 | 
 27 | - Download the DeepSpeech native client library.  It can be found on the [DeepSpeech releases page](https://github.com/Mozilla/DeepSpeech/releases/latest)
 28 |   and will be named something like `native_client.<arch>.<flavor>.<os>.tar.xz`.  The most recent
 29 |   version tested is **0.9.3**.  It should contain a `deepspeech.h` file and a platform-specific
 30 |   library, like a .so or .dll file.
 31 | - Download the speech recognition model from the same release page.  It's a huge `pbmm` file.
 32 | 
 33 | > Note: There are multiple flavors of the native client.  The `cpu` flavor runs on the CPU, the
 34 | `cuda` flavor runs on the GPU with CUDA, and the `tflite` flavor can use the smaller tflite model
 35 | instead of the pbmm one.  It's recommended to start with the `cpu` flavor.
 36 | 
 37 | ### Scorer
 38 | 
 39 | You can also optionally create a thing called a "scorer package".  The scorer acts as the grammar
 40 | or vocabulary for the recognition, allowing it to recognize a custom set of words or phrases.  This
 41 | can improve accuracy and speed by a lot, and is useful if you only have a few words or commands that
 42 | need to be detected.  See [here](https://deepspeech.readthedocs.io/en/v0.9.3/Scorer.html) for
 43 | instructions on generating a scorer.
 44 | 
 45 | Building
 46 | ---
 47 | 
 48 | Once you have the DeepSpeech files downloaded, build the Lua bindings in this repository.  You can
 49 | download prebuilt files from the releases page (TBD, still trying to get GitHub Actions working on
 50 | Windows) or build them using CMake.  If you're using LÖVR you can also add this repository to the
 51 | `plugins` folder and rebuild.  The `DEEPSPEECH_PATH` variable needs to be set to the path to the
 52 | native client.
 53 | 
 54 | ```sh
 55 | $ mkdir build
 56 | $ cd build
 57 | $ cmake .. -DDEEPSPEECH_PATH=/path/to/native_client
 58 | $ cmake --build .
 59 | ```
 60 | 
 61 | This should output `lua-deepspeech.dll` or `lua-deepspeech.so`.
 62 | 
 63 | The deepspeech native_client library needs to be placed somewhere that it can be loaded at runtime
 64 | and the lua-deepspeech library needs to be somewhere that it can be required by Lua.  For LÖVR both
 65 | of these can be put next to the lovr executable (building as a plugin will take care of this).
 66 | For other engines it will probably be different.
 67 | 
 68 | > Note: on Windows the deepspeech library has a really weird name: `libdeepspeech.so`
 69 | 
 70 | Usage
 71 | ---
 72 | 
 73 | First, require the module:
 74 | 
 75 | ```lua
 76 | local speech = require 'lua-deepspeech'
 77 | ```
 78 | 
 79 | It returns a table with the library's functionality.
 80 | 
 81 | ```lua
 82 | success, sampleRate = speech.init(options)
 83 | ```
 84 | 
 85 | The library must be initialized with an options table.  The table can contain the following options:
 86 | 
 87 | - `options.model` should be a full path to the deepspeech model file (pbmm).  If this file is stored
 88 |   in a zip archive fused to the executable it will need to be written to disk first.
 89 | - `options.scorer` is an optional a path to the scorer package.
 90 | - `options.beamWidth` is an optional beam width number.  A higher beam width increases accuracy at
 91 |   the cost of performance.
 92 | - `options.alpha` and `options.beta` are optional paramters for the scorer.  Usually the defaults
 93 |   are fine.
 94 | 
 95 | The function either returns false plus an error message or true and the audio sample rate that the
 96 | model was trained against.  All audio must be provided as **signed 16 bit mono** samples at this
 97 | sample rate.  It's almost always 16000Hz.
 98 | 
 99 | ```lua
100 | text = speech.decode(table)
101 | text = speech.decode(pointer, count)
102 | ```
103 | 
104 | This function performs speech-to-text.  A table of audio samples can be provided, or a lightuserdata
105 | pointer with a sample count.
106 | 
107 | In all cases the audio data must be formatted as **signed 16 bit mono** samples at the model's
108 | sample rate.
109 | 
110 | Returns a string with the decoded text.
111 | 
112 | ```lua
113 | transcripts = speech.analyze(table, limit)
114 | transcripts = speech.analyze(pointer, count, limit)
115 | ```
116 | 
117 | This is the same as `decode`, but returns extra metadata about the result.  The return value is a
118 | list of transcripts.  Each transcript is a table with:
119 | 
120 | - `confidence` is the confidence level.  May be negative.  Transcripts are sorted by confidence.
121 | - `tokens` a list of tokens (i.e. letters) that were decoded.
122 | - `times` a list of timestamps for each token, in seconds.
123 | 
124 | `limit` can optionally be used to limit the number of transcripts returned, defaulting to 5.
125 | 
126 | ```lua
127 | speech.boost(word, amount)
128 | ```
129 | 
130 | Boosts a word.
131 | 
132 | ```lua
133 | speech.unboost(word)
134 | speech.unboost()
135 | ```
136 | 
137 | Unboosts a word, or unboosts all words if no arguments are provided.
138 | 
139 | ### Streams
140 | 
141 | A stream object can be used to decode audio in real time as it arrives.  Usually you'd use this with
142 | audio coming from a microphone.
143 | 
144 | ```lua
145 | stream = speech.newStream()
146 | ```
147 | 
148 | Creates a new Stream.
149 | 
150 | ```lua
151 | Stream:feed(table)
152 | Stream:feed(pointer, count)
153 | ```
154 | 
155 | Feeds audio to the Stream.  Accepts the same arguments as `speech.decode`.
156 | 
157 | ```lua
158 | text = Stream:decode()
159 | ```
160 | 
161 | Performs an intermediate decode on the audio data fed to the Stream, returning the decoded text.
162 | Additional audio can continue to be fed to the Stream after this function is called.
163 | 
164 | ```lua
165 | transcripts = Stream:analyze()
166 | ```
167 | 
168 | Performs an intermediate analysis on the audio data fed to the Stream.  See `speech.analyze`.
169 | Additional audio can continue to be fed to the Stream after this function is called.
170 | 
171 | ```lua
172 | text = Stream:finish()
173 | ```
174 | 
175 | Finishes and resets the Stream, returning the final decoded text.
176 | 
177 | ```lua
178 | Stream:clear()
179 | ```
180 | 
181 | Resets the Stream, erasing all audio that has been fed to it.
182 | 
183 | Tips
184 | ---
185 | 
186 | - Although DeepSpeech performs at realtime speeds, it's still a good idea to offload the decoding
187 |   to a separate thread, especially when rendering realtime graphics alongside speech recognition.
188 | - If you are getting garbage results, ensure you're using the correct sample rate and audio format.
189 |   DeepSpeech is also somewhat sensitive to background noise and low volume levels.  To improve
190 |   accuracy further, consider using a custom scorer.
191 | - When feeding audio to a stream, varying the size of the chunks of audio you feed can be used to
192 |   trade off latency for performance.
193 | 
194 | License
195 | ---
196 | 
197 | MIT, see [`LICENSE`](LICENSE) for details.
198 | 


--------------------------------------------------------------------------------
/lua_deepspeech.c:
--------------------------------------------------------------------------------
  1 | #include <lua.h>
  2 | #include <lualib.h>
  3 | #include <lauxlib.h>
  4 | #include <deepspeech.h>
  5 | #include <stdbool.h>
  6 | #include <stdint.h>
  7 | #include <stdlib.h>
  8 | 
  9 | #define CHECK(c, ...) if (!(c)) { return luaL_error(L, __VA_ARGS__); }
 10 | 
 11 | #ifdef _WIN32
 12 | #define LDS_EXPORT __declspec(dllexport)
 13 | #else
 14 | #define LDS_EXPORT
 15 | #endif
 16 | 
 17 | static struct {
 18 |   ModelState* modelState;
 19 |   size_t bufferSize;
 20 |   short* buffer;
 21 | } state;
 22 | 
 23 | typedef struct {
 24 |   StreamingState* handle;
 25 | } lds_Stream;
 26 | 
 27 | static const short* lds_checksamples(lua_State* L, int index, size_t* count) {
 28 |   if (lua_istable(L, index)) {
 29 |     *count = lua_objlen(L, index);
 30 | 
 31 |     if (state.bufferSize < *count) {
 32 |       state.bufferSize += !state.bufferSize;
 33 |       do { state.bufferSize <<= 1; } while (state.bufferSize < *count);
 34 |       state.buffer = realloc(state.buffer, state.bufferSize);
 35 |     }
 36 | 
 37 |     for (size_t i = 0; i < *count; i++) {
 38 |       lua_rawgeti(L, index, i + 1);
 39 |       lua_Integer x  = lua_tointeger(L, -1);
 40 |       lua_pop(L, 1);
 41 | 
 42 |       if (x < INT16_MIN || x > INT16_MAX) {
 43 |         luaL_error(L, "Sample #%d (%d) is out of range [%d,%d]", i + 1, x, INT16_MIN, INT16_MAX);
 44 |       }
 45 | 
 46 |       state.buffer[i] = x;
 47 |     }
 48 | 
 49 |     return state.buffer;
 50 |   } else if (lua_type(L, index) == LUA_TLIGHTUSERDATA) {
 51 |     return *count = luaL_checkinteger(L, index + 1), lua_touserdata(L, index);
 52 |   }
 53 | 
 54 |   return NULL;
 55 | }
 56 | 
 57 | static void lds_pushmetadata(lua_State* L, Metadata* metadata) {
 58 |   lua_createtable(L, metadata->num_transcripts, 0);
 59 |   for (int i = 0; i < metadata->num_transcripts; i++) {
 60 |     const CandidateTranscript* transcript = &metadata->transcripts[i];
 61 |     lua_createtable(L, 0, 3);
 62 | 
 63 |     lua_pushnumber(L, transcript->confidence);
 64 |     lua_setfield(L, -2, "confidence");
 65 | 
 66 |     lua_createtable(L, transcript->num_tokens, 0);
 67 |     for (int j = 0; j < transcript->num_tokens; j++) {
 68 |       lua_pushnumber(L, transcript->tokens[j].start_time);
 69 |       lua_rawseti(L, -2, j + 1);
 70 |     }
 71 |     lua_setfield(L, -2, "times");
 72 | 
 73 |     lua_createtable(L, transcript->num_tokens, 0);
 74 |     for (int j = 0; j < transcript->num_tokens; j++) {
 75 |       lua_pushstring(L, transcript->tokens[j].text);
 76 |       lua_rawseti(L, -2, j + 1);
 77 |     }
 78 |     lua_setfield(L, -2, "tokens");
 79 | 
 80 |     lua_rawseti(L, -2, i + 1);
 81 |   }
 82 | }
 83 | 
 84 | static int lds_init(lua_State* L) {
 85 |   luaL_argcheck(L, lua_istable(L, 1), 1, "Expected config to be a table");
 86 | 
 87 |   if (state.modelState) {
 88 |     DS_FreeModel(state.modelState);
 89 |     state.modelState = NULL;
 90 |   }
 91 | 
 92 |   const char* model = NULL;
 93 |   const char* scorer = NULL;
 94 | 
 95 |   lua_getfield(L, 1, "model");
 96 |   CHECK(lua_type(L, -1) == LUA_TSTRING, "config.model should be a string containing a path to the pbmm file");
 97 |   model = lua_tostring(L, -1);
 98 |   lua_pop(L, 1);
 99 | 
100 |   lua_getfield(L, 1, "scorer");
101 |   int type = lua_type(L, -1);
102 |   CHECK(type == LUA_TNIL || type == LUA_TSTRING, "config.scorer should be nil or a string");
103 |   scorer = lua_tostring(L, -1);
104 |   lua_pop(L, 1);
105 | 
106 |   int err = DS_CreateModel(model, &state.modelState);
107 |   if (err) {
108 |     lua_pushboolean(L, false);
109 |     char* message = DS_ErrorCodeToErrorMessage(err);
110 |     lua_pushstring(L, message);
111 |     DS_FreeString(message);
112 |     return 2;
113 |   }
114 | 
115 |   lua_getfield(L, 1, "beamWidth");
116 |   if (!lua_isnil(L, -1)) {
117 |     DS_SetModelBeamWidth(state.modelState, luaL_checkinteger(L, -1));
118 |   }
119 |   lua_pop(L, 1);
120 | 
121 |   if (scorer) {
122 |     CHECK(DS_EnableExternalScorer(state.modelState, scorer) == 0, "Failed to set scorer");
123 | 
124 |     lua_getfield(L, 1, "alpha");
125 |     float alpha = lua_tonumber(L, -1);
126 |     lua_pop(L, 1);
127 | 
128 |     lua_getfield(L, 1, "beta");
129 |     float beta = lua_tonumber(L, -1);
130 |     lua_pop(L, 1);
131 | 
132 |     if (alpha != 0.f || beta != 0.f) {
133 |       CHECK(DS_SetScorerAlphaBeta(state.modelState, alpha, beta) == 0, "Failed to set scorer alpha/beta");
134 |     }
135 |   }
136 | 
137 |   lua_pushboolean(L, true);
138 |   lua_pushinteger(L, DS_GetModelSampleRate(state.modelState));
139 |   return 2;
140 | }
141 | 
142 | static int lds_destroy(lua_State* L) {
143 |   if (state.modelState) {
144 |     DS_FreeModel(state.modelState);
145 |     state.modelState = NULL;
146 |   }
147 |   state.bufferSize = 0;
148 |   free(state.buffer);
149 |   return 0;
150 | }
151 | 
152 | static int lds_decode(lua_State* L) {
153 |   size_t sampleCount;
154 |   CHECK(state.modelState != NULL, "DeepSpeech is not initialized");
155 |   const short* samples = lds_checksamples(L, 1, &sampleCount);
156 |   CHECK(samples != NULL, "Expected a table or lightuserdata pointer for audio sample data");
157 |   char* text = DS_SpeechToText(state.modelState, samples, sampleCount);
158 |   lua_pushstring(L, text);
159 |   DS_FreeString(text);
160 |   return 1;
161 | }
162 | 
163 | static int lds_analyze(lua_State* L) {
164 |   size_t sampleCount;
165 |   CHECK(state.modelState != NULL, "DeepSpeech is not initialized");
166 |   const short* samples = lds_checksamples(L, 1, &sampleCount);
167 |   CHECK(samples != NULL, "Expected a table or lightuserdata pointer for audio sample data");
168 |   uint32_t limit = luaL_optinteger(L, lua_istable(L, 1) ? 2 : 3, 3);
169 |   Metadata* metadata = DS_SpeechToTextWithMetadata(state.modelState, samples, sampleCount, limit);
170 |   lds_pushmetadata(L, metadata);
171 |   DS_FreeMetadata(metadata);
172 |   return 1;
173 | }
174 | 
175 | static int lds_boost(lua_State* L) {
176 |   CHECK(state.modelState != NULL, "DeepSpeech is not initialized");
177 |   const char* word = luaL_checkstring(L, 1);
178 |   float boost = luaL_checknumber(L, 2);
179 |   DS_AddHotWord(state.modelState, word, boost);
180 |   return 0;
181 | }
182 | 
183 | static int lds_unboost(lua_State* L) {
184 |   CHECK(state.modelState != NULL, "DeepSpeech is not initialized");
185 |   const char* word = lua_tostring(L, 1);
186 |   if (word) {
187 |     DS_EraseHotWord(state.modelState, word);
188 |   } else {
189 |     DS_ClearHotWords(state.modelState);
190 |   }
191 |   return 0;
192 | }
193 | 
194 | static int lds_newStream(lua_State* L) {
195 |   CHECK(state.modelState != NULL, "DeepSpeech is not initialized");
196 |   lds_Stream* stream = (lds_Stream*) lua_newuserdata(L, sizeof(lds_Stream));
197 |   CHECK(DS_CreateStream(state.modelState, &stream->handle) == 0, "Could not create stream");
198 |   luaL_getmetatable(L, "lds_Stream");
199 |   lua_setmetatable(L, -2);
200 |   return 1;
201 | }
202 | 
203 | static int lds_stream_feed(lua_State* L) {
204 |   size_t sampleCount;
205 |   lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream");
206 |   const short* samples = lds_checksamples(L, 2, &sampleCount);
207 |   CHECK(samples != NULL, "Expected a table or lightuserdata pointer for audio sample data");
208 |   DS_FeedAudioContent(stream->handle, samples, sampleCount);
209 |   return 0;
210 | }
211 | 
212 | static int lds_stream_decode(lua_State* L) {
213 |   lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream");
214 |   char* text = DS_IntermediateDecode(stream->handle);
215 |   lua_pushstring(L, text);
216 |   DS_FreeString(text);
217 |   return 1;
218 | }
219 | 
220 | static int lds_stream_analyze(lua_State* L) {
221 |   lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream");
222 |   uint32_t limit = luaL_optinteger(L, 2, 3);
223 |   Metadata* metadata = DS_IntermediateDecodeWithMetadata(stream->handle, limit);
224 |   lds_pushmetadata(L, metadata);
225 |   DS_FreeMetadata(metadata);
226 |   return 1;
227 | }
228 | 
229 | static int lds_stream_finish(lua_State* L) {
230 |   lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream");
231 |   char* text = DS_FinishStream(stream->handle);
232 |   lua_pushstring(L, text);
233 |   DS_FreeString(text);
234 |   DS_CreateStream(state.modelState, &stream->handle);
235 |   return 1;
236 | }
237 | 
238 | static int lds_stream_clear(lua_State* L) {
239 |   lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream");
240 |   DS_FreeStream(stream->handle);
241 |   DS_CreateStream(state.modelState, &stream->handle);
242 |   return 0;
243 | }
244 | 
245 | static int lds_stream_destroy(lua_State* L) {
246 |   lds_Stream* stream = (lds_Stream*) luaL_checkudata(L, 1, "lds_Stream");
247 |   DS_FreeStream(stream->handle);
248 |   return 0;
249 | }
250 | 
251 | static const luaL_Reg lds_api[] = {
252 |   { "init", lds_init },
253 |   { "decode", lds_decode },
254 |   { "analyze", lds_analyze },
255 |   { "boost", lds_boost },
256 |   { "unboost", lds_unboost },
257 |   { "newStream", lds_newStream },
258 |   { NULL, NULL },
259 | };
260 | 
261 | static const luaL_Reg lds_stream_api[] = {
262 |   { "feed", lds_stream_feed },
263 |   { "decode", lds_stream_decode },
264 |   { "analyze", lds_stream_analyze },
265 |   { "finish", lds_stream_finish },
266 |   { "clear", lds_stream_clear },
267 |   { "__gc", lds_stream_destroy },
268 |   { NULL, NULL }
269 | };
270 | 
271 | LDS_EXPORT int luaopen_deepspeech(lua_State* L) {
272 |   lua_newtable(L);
273 |   luaL_register(L, NULL, lds_api);
274 | 
275 |   // Add sentinel userdata to free the model state on GC
276 |   lua_newuserdata(L, sizeof(void*));
277 |   lua_createtable(L, 0, 1);
278 |   lua_pushcfunction(L, lds_destroy);
279 |   lua_setfield(L, -2, "__gc");
280 |   lua_setmetatable(L, -2);
281 |   lua_setfield(L, -2, "");
282 | 
283 |   if (luaL_newmetatable(L, "lds_Stream")) {
284 |     lua_pushvalue(L, -1);
285 |     lua_setfield(L, -2, "__index");
286 |     luaL_register(L, NULL, lds_stream_api);
287 |     lua_pop(L, 1);
288 |   } else {
289 |     return luaL_error(L, "Could not register lds_Stream metatable!");
290 |   }
291 | 
292 |   return 1;
293 | }
294 | 


--------------------------------------------------------------------------------
/main.lua:
--------------------------------------------------------------------------------
 1 | lovr.speech = require 'lua-deepspeech'
 2 | 
 3 | function lovr.load()
 4 |   lovr.speech.init({
 5 |     model = lovr.filesystem.getSource() .. '/deepspeech-0.9.3-models.pbmm'
 6 |   })
 7 | 
 8 |   -- Decode a sound file if provided
 9 |   if arg[1] then
10 |     local sound = lovr.data.newSound(arg[1])
11 |     local count = sound:getFrameCount()
12 |     local samples = sound:getBlob():getPointer()
13 |     local text = lovr.speech.decode(samples, count)
14 |     print(text)
15 |     lovr.event.quit()
16 |     return
17 |   end
18 | 
19 |   -- Otherwise set up microphone capture and feed audio to a speech decoder stream
20 |   sink = lovr.data.newSound(4096, 'f32', 'mono', 16000)
21 |   lovr.audio.setDevice('capture', 'default', sink)
22 |   lovr.audio.start('capture')
23 |   stream = lovr.speech.newStream()
24 | end
25 | 
26 | function lovr.update(dt)
27 |   if sink:getFrameCount() > 1024 then
28 |     stream:feed(sink:getFrames())
29 |     print(stream:decode())
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------