├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CHANGELOG.md
├── Cargo.toml
├── LICENSE
├── README.md
├── crates
    ├── vosk-sys
    │   ├── CHANGELOG.md
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │   │   └── lib.rs
    └── vosk
    │   ├── CHANGELOG.md
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── examples
    │       ├── grammar.rs
    │       ├── microphone.rs
    │       ├── read_wav.rs
    │       └── speaker_model.rs
    │   └── src
    │       ├── gpu.rs
    │       ├── lib.rs
    │       ├── log.rs
    │       ├── models
    │           ├── batch.rs
    │           ├── mod.rs
    │           └── sequential.rs
    │       └── recognition
    │           ├── batch.rs
    │           ├── errors.rs
    │           ├── mod.rs
    │           ├── results.rs
    │           └── sequential.rs
├── flake.lock
├── flake.nix
└── nix
    └── shells
        └── default
            └── default.nix


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | env:
12 |   CARGO_TERM_COLOR: always
13 |   NIX_DEV: nix develop --command
14 |   CARGO_FLAGS: --all-targets --all-features
15 | 
16 | jobs:
17 |   rustfmt:
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - name: Checkout repository
21 |         uses: actions/checkout@v4
22 |       - name: Install Nix
23 |         uses: DeterminateSystems/nix-installer-action@v14
24 |       - name: Cache Nix store
25 |         uses: DeterminateSystems/magic-nix-cache-action@v8
26 |       - name: Run rustfmt
27 |         run: $NIX_DEV cargo fmt --all -- --check
28 | 
29 |   clippy:
30 |     runs-on: ubuntu-latest
31 |     steps:
32 |       - name: Checkout repository
33 |         uses: actions/checkout@v4
34 |       - name: Install Nix
35 |         uses: DeterminateSystems/nix-installer-action@v14
36 |       - name: Cache Nix store
37 |         uses: DeterminateSystems/magic-nix-cache-action@v8
38 |       - name: Run clippy
39 |         run: $NIX_DEV cargo clippy --workspace $CARGO_FLAGS -- -D warnings
40 | 
41 |   build:
42 |     runs-on: ubuntu-latest
43 |     steps:
44 |       - name: Checkout repository
45 |         uses: actions/checkout@v4
46 |       - name: Install Nix
47 |         uses: DeterminateSystems/nix-installer-action@v14
48 |       - name: Cache Nix store
49 |         uses: DeterminateSystems/magic-nix-cache-action@v8
50 |       - name: Build
51 |         run: $NIX_DEV cargo build $CARGO_FLAGS
52 |       - name: Run tests
53 |         run: $NIX_DEV cargo test $CARGO_FLAGS
54 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /Cargo.lock
3 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # 0.3.1
 2 | * Fix flag-enabled items not showing up on [docs.rs](https://docs.rs/vosk/0.3.0/vosk/index.html).
 3 | 
 4 | # 0.3.0
 5 | * Add support for Batch recognition ([PR](https://github.com/Bear-03/vosk-rs/pull/8)).
 6 | * [BREAKING] Redesign `LogLevel` to adequately represent Kaldi log levels ([PR](https://github.com/Bear-03/vosk-rs/pull/9)).
 7 | * [BREAKING] `Recognizer::accept_waveform` methods now return `Result<T, AcceptWaveformError>` (previously `T`).
 8 |   Vosk takes the buffer length as an `i32` so the user should be able to handle errors that arise due to the
 9 |   buffer being longer than `i32::MAX`.
10 | * [BREAKING] `Model::find_word` now returns `Option<u32>` (previously `Option<u16>`) to adjust it to the values
11 |   that Vosk can return.
12 | 
13 | # 0.2.0
14 | * Documentation fixes.
15 | * Loosen bounds for Recognizer::new_with_grammar.
16 | * [BREAKING] Extra double quotes are no longer needed for phrases in `Recognizer::new_with_grammar`.
17 | 
18 | # 0.1.0
19 | * First release.
20 | 
21 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | members = [
3 |     "crates/*"
4 | ]
5 | resolver = "2"
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Bear_03
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Vosk
 2 | 
 3 | [![Latest release](https://img.shields.io/crates/v/vosk.svg)](https://crates.io/crates/vosk)
 4 | [![Documentation](https://docs.rs/vosk/badge.svg)](https://docs.rs/vosk)
 5 | [![MIT](https://img.shields.io/github/license/Bear-03/vosk-rs)](https://github.com/Bear-03/vosk-rs)
 6 | [![Build Status](https://github.com/Bear-03/vosk-rs/workflows/CI/badge.svg)](https://github.com/Bear-03/vosk-rs/actions?workflow=CI)
 7 | 
 8 | Safe FFI bindings around the [Vosk API Speech Recognition Toolkit](https://github.com/alphacep/vosk-api).
 9 | 
10 | ## Usage
11 | ```rust
12 | // Simplified version of examples/read_wav.rs
13 | 
14 | // Normally you would not want to hardcode the audio samples
15 | let samples = vec![100, -2, 700, 30, 4, 5];
16 | let model_path = "/path/to/model";
17 | 
18 | let model = Model::new(model_path).unwrap();
19 | let mut recognizer = Recognizer::new(&model, 16000.0).unwrap();
20 | 
21 | recognizer.set_max_alternatives(10);
22 | recognizer.set_words(true);
23 | recognizer.set_partial_words(true);
24 | 
25 | for sample in samples.chunks(100) {
26 |     recognizer.accept_waveform(sample);
27 |     println!("{:#?}", recognizer.partial_result());
28 | }
29 | 
30 | println!("{:#?}", recognizer.final_result().multiple().unwrap());
31 | ```
32 | 
33 | ## Setup
34 | 
35 | ### Compilation
36 | 
37 | The Vosk-API libraries have to be discoverable by the rust linker. Download the zip file containing the dynamic libraries for your platform [here](https://github.com/alphacep/vosk-api/releases). For iOS development you have to use static libraries. Get the static libraries from the [vosk-api][vosk-api-ios] team.
38 | 
39 | #### Using dynamic libraries
40 | Do either of the following:
41 | 
42 | - **Recommended:** Create a [build script][build-script-explanation] and provide cargo with the path to the libraries
43 | - Use the [`RUSTFLAGS` environment variable][rust-env-variables] to provide the path to the variables like so:
44 |     `RUSTFLAGS=-L/path/to/the/libraries`
45 |     with `cargo:rustc-link-search` or `cargo:rustc-link-lib`.
46 | -   Make the vosk library accessible system or user-wide:
47 |     - Windows: Move the libraries to a directory in your `PATH` environment variable.
48 |     - Linux: Move them to `/usr/local/lib`, `/usr/lib` or set the `LIBRARY_PATH` environment variable to the directory containing the libraries.
49 | 
50 | Although the approaches are equivalent, using a build script is more convenient because it does not require
51 | the developer to remember a terminal command or change anything outside the project scope.
52 | 
53 | #### Using static libraries (macOS-only, targeting iOS)
54 | 
55 | - [Extract](https://llvm.org/docs/CommandGuide/llvm-lipo.html) the correct non-fat file (also called thin file) from the static fat file (libvosk.a) for each architecture you would like to support.
56 | - [Mark your crate type as](https://doc.rust-lang.org/cargo/reference/cargo-targets.html#the-crate-type-field) `staticlib`.
57 | - Create a [build script][build-script-explanation] and provide cargo with the path to the libraries with `cargo:rustc-link-search=` and `cargo:rustc-link-lib=static=`.
58 | 
59 | ##### Troubleshooting
60 | In real-world scenarios, one will use Rust to cross compile a library (e.g. Android and iOS). Therefore, we need both `cdylib` as well as the `staticlib` as crate-type. If you compile as usual with cargo build (e.g.: `cargo build --target aarch64-apple-ios --release`) it will not work, because cargo tries to build the dylib as well. Fortunately, since rust 1.64, there is a new option for [rustc](https://github.com/rust-lang/cargo/issues/10083) in the stable channel. Because of this, the following will work: `cargo rustc --crate-type staticlib --lib --target aarch64-apple-ios --release`
61 | 
62 | ### Execution
63 | Executables compiled with a dynamic lib must have access to the vosk library at runtime. Executables compiled with a statically compiled library do not.
64 | 
65 | #### Using dynamic libraries
66 | Do either of the following:
67 | 
68 | -   **Recommended:** Copy the libraries to the root of the executable
69 |     (`target/<cargo profile name>` by default). It is recommended that you use a tool such as
70 |     [cargo-make](https://sagiegurari.github.io/cargo-make/) to automate moving the libraries
71 |     from another, more practical, directory to the destination during build.
72 | -   Make the vosk library accessible system or user-wide:
73 |     - Windows: Move the libraries to a directory in your `PATH` environment variable.
74 |     - Linux: Move them to `/usr/local/lib`, `/usr/lib` or set the `LD_LIBRARY_PATH` environment variable to the directory containing the libraries. Note: `LD_LIBRARY_PATH` is not the same as `LIBRARY_PATH` mentioned in the compilation step.
75 | 
76 | 
77 | #### Using static libraries (iOS-only)
78 | 
79 | - Add the compiled .a library (or libraries if you would like to support more than one architecture) to your iOS project
80 | - Set `Enable Bitcode` to **no** for your target
81 | - Add the `Accelerate Framework` from the iOS SDK to your project
82 | - Depending on your library and use case, you have to write some C -> Objective-C -> Swift glue code.
83 | 
84 | [build-script-explanation]: https://doc.rust-lang.org/cargo/reference/build-scripts.html
85 | [rust-env-variables]: https://doc.rust-lang.org/cargo/reference/environment-variables.html
86 | [vosk-api-ios]: https://alphacephei.com/vosk/install#ios-build
87 | 


--------------------------------------------------------------------------------
/crates/vosk-sys/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # 0.2.0
2 | * Add support for Batch recognition ([PR](https://github.com/Bear-03/vosk-rs/pull/8)).
3 | 
4 | # 0.1.1
5 | * Documentation fixes.
6 | 
7 | # 0.1.0
8 | * First release.


--------------------------------------------------------------------------------
/crates/vosk-sys/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "vosk-sys"
 3 | version = "0.2.0"
 4 | edition = "2021"
 5 | authors = ["Bear_03"]
 6 | description = "Raw FFI bindings around the Vosk API Speech Recognition Toolkit"
 7 | license = "MIT"
 8 | repository = "https://github.com/Bear-03/vosk-rs"
 9 | keywords = ["speech", "speech-to-text", "stt"]
10 | categories = ["api-bindings", "multimedia::audio"]
11 | 
12 | [dependencies]
13 | 


--------------------------------------------------------------------------------
/crates/vosk-sys/README.md:
--------------------------------------------------------------------------------
 1 | # Vosk-sys
 2 | 
 3 | [![Latest release](https://img.shields.io/crates/v/vosk-sys.svg)](https://crates.io/crates/vosk-sys)
 4 | [![Documentation](https://docs.rs/vosk-sys/badge.svg)](https://docs.rs/vosk-sys)
 5 | [![MIT](https://img.shields.io/github/license/Bear-03/vosk-rs)](https://github.com/Bear-03/vosk-rs)
 6 | [![Build Status](https://github.com/Bear-03/vosk-rs/workflows/CI/badge.svg)](https://github.com/Bear-03/vosk-rs/actions?workflow=CI)
 7 | 
 8 | Raw FFI bindings around the [Vosk API Speech Recognition Toolkit](https://github.com/alphacep/vosk-api),
 9 | autogenerated via [rust-bindgen](https://github.com/rust-lang/rust-bindgen).
10 | 
11 | ## Setup and usage
12 | 
13 | This crate needs the same setup as its safe counterpart. Read the steps [here](../../README.md).
14 | 
15 | 


--------------------------------------------------------------------------------
/crates/vosk-sys/src/lib.rs:
--------------------------------------------------------------------------------
  1 | /* automatically generated by rust-bindgen 0.60.1 */
  2 | 
  3 | #![allow(non_snake_case)]
  4 | #![allow(non_camel_case_types)]
  5 | #![allow(non_upper_case_globals)]
  6 | 
  7 | #[repr(C)]
  8 | #[derive(Debug, Copy, Clone)]
  9 | pub struct VoskModel {
 10 |     _unused: [u8; 0],
 11 | }
 12 | 
 13 | #[repr(C)]
 14 | #[derive(Debug, Copy, Clone)]
 15 | pub struct VoskSpkModel {
 16 |     _unused: [u8; 0],
 17 | }
 18 | 
 19 | #[repr(C)]
 20 | #[derive(Debug, Copy, Clone)]
 21 | pub struct VoskRecognizer {
 22 |     _unused: [u8; 0],
 23 | }
 24 | 
 25 | #[repr(C)]
 26 | #[derive(Debug, Copy, Clone)]
 27 | pub struct VoskBatchModel {
 28 |     _unused: [u8; 0],
 29 | }
 30 | 
 31 | #[repr(C)]
 32 | #[derive(Debug, Copy, Clone)]
 33 | pub struct VoskBatchRecognizer {
 34 |     _unused: [u8; 0],
 35 | }
 36 | 
 37 | #[cfg_attr(not(target_os = "windows"), link(name = "vosk"))]
 38 | #[cfg_attr(target_os = "windows", link(name = "libvosk"))]
 39 | extern "C" {
 40 |     #[doc = " Loads model data from the file and returns the model object"]
 41 |     #[doc = ""]
 42 |     #[doc = " @param model_path: the path of the model on the filesystem"]
 43 |     #[doc = " @returns model object or NULL if problem occured"]
 44 |     pub fn vosk_model_new(model_path: *const ::std::os::raw::c_char) -> *mut VoskModel;
 45 | 
 46 |     #[doc = " Releases the model memory"]
 47 |     #[doc = ""]
 48 |     #[doc = "  The model object is reference-counted so if some recognizer"]
 49 |     #[doc = "  depends on this model, model might still stay alive. When"]
 50 |     #[doc = "  last recognizer is released, model will be released too."]
 51 |     pub fn vosk_model_free(model: *mut VoskModel);
 52 | 
 53 |     #[doc = " Check if a word can be recognized by the model"]
 54 |     #[doc = " @param word: the word"]
 55 |     #[doc = " @returns the word symbol if @param word exists inside the model"]
 56 |     #[doc = " or -1 otherwise."]
 57 |     #[doc = " Reminding that word symbol 0 is for \\<epsilon\\>"]
 58 |     pub fn vosk_model_find_word(
 59 |         model: *mut VoskModel,
 60 |         word: *const ::std::os::raw::c_char,
 61 |     ) -> ::std::os::raw::c_int;
 62 | 
 63 |     #[doc = " Loads speaker model data from the file and returns the model object"]
 64 |     #[doc = ""]
 65 |     #[doc = " @param model_path: the path of the model on the filesystem"]
 66 |     #[doc = " @returns model object or NULL if problem occured"]
 67 |     pub fn vosk_spk_model_new(model_path: *const ::std::os::raw::c_char) -> *mut VoskSpkModel;
 68 | 
 69 |     #[doc = " Releases the model memory"]
 70 |     #[doc = ""]
 71 |     #[doc = "  The model object is reference-counted so if some recognizer"]
 72 |     #[doc = "  depends on this model, model might still stay alive. When"]
 73 |     #[doc = "  last recognizer is released, model will be released too."]
 74 |     pub fn vosk_spk_model_free(model: *mut VoskSpkModel);
 75 | 
 76 |     #[doc = " Creates the recognizer object"]
 77 |     #[doc = ""]
 78 |     #[doc = "  The recognizers process the speech and return text using shared model data"]
 79 |     #[doc = "  @param model       VoskModel containing static data for recognizer. Model can be"]
 80 |     #[doc = "                     shared across recognizers, even running in different threads."]
 81 |     #[doc = "  @param sample_rate The sample rate of the audio you going to feed into the recognizer."]
 82 |     #[doc = "                     Make sure this rate matches the audio content, it is a common"]
 83 |     #[doc = "                     issue causing accuracy problems."]
 84 |     #[doc = "  @returns recognizer object or NULL if problem occured"]
 85 |     pub fn vosk_recognizer_new(model: *mut VoskModel, sample_rate: f32) -> *mut VoskRecognizer;
 86 | 
 87 |     #[doc = " Creates the recognizer object with speaker recognition"]
 88 |     #[doc = ""]
 89 |     #[doc = "  With the speaker recognition mode the recognizer not just recognize"]
 90 |     #[doc = "  text but also return speaker vectors one can use for speaker identification"]
 91 |     #[doc = ""]
 92 |     #[doc = "  @param model       VoskModel containing static data for recognizer. Model can be"]
 93 |     #[doc = "                     shared across recognizers, even running in different threads."]
 94 |     #[doc = "  @param sample_rate The sample rate of the audio you going to feed into the recognizer."]
 95 |     #[doc = "                     Make sure this rate matches the audio content, it is a common"]
 96 |     #[doc = "                     issue causing accuracy problems."]
 97 |     #[doc = "  @param spk_model speaker model for speaker identification"]
 98 |     #[doc = "  @returns recognizer object or NULL if problem occured"]
 99 |     pub fn vosk_recognizer_new_spk(
100 |         model: *mut VoskModel,
101 |         sample_rate: f32,
102 |         spk_model: *mut VoskSpkModel,
103 |     ) -> *mut VoskRecognizer;
104 | 
105 |     #[doc = " Creates the recognizer object with the phrase list"]
106 |     #[doc = ""]
107 |     #[doc = "  Sometimes when you want to improve recognition accuracy and when you don't need"]
108 |     #[doc = "  to recognize large vocabulary you can specify a list of phrases to recognize. This"]
109 |     #[doc = "  will improve recognizer speed and accuracy but might return \\[unk\\] if user said"]
110 |     #[doc = "  something different."]
111 |     #[doc = ""]
112 |     #[doc = "  Only recognizers with lookahead models support this type of quick configuration."]
113 |     #[doc = "  Precompiled HCLG graph models are not supported."]
114 |     #[doc = ""]
115 |     #[doc = "  @param model       VoskModel containing static data for recognizer. Model can be"]
116 |     #[doc = "                     shared across recognizers, even running in different threads."]
117 |     #[doc = "  @param sample_rate The sample rate of the audio you going to feed into the recognizer."]
118 |     #[doc = "                     Make sure this rate matches the audio content, it is a common"]
119 |     #[doc = "                     issue causing accuracy problems."]
120 |     #[doc = "  @param grammar The string with the list of phrases to recognize as JSON array of strings,"]
121 |     #[doc = "                 for example \"\\[\"one two three four five\", \"\\[unk\\]\"\\]\"."]
122 |     #[doc = ""]
123 |     #[doc = "  @returns recognizer object or NULL if problem occured"]
124 |     pub fn vosk_recognizer_new_grm(
125 |         model: *mut VoskModel,
126 |         sample_rate: f32,
127 |         grammar: *const ::std::os::raw::c_char,
128 |     ) -> *mut VoskRecognizer;
129 | 
130 |     #[doc = " Adds speaker model to already initialized recognizer"]
131 |     #[doc = ""]
132 |     #[doc = " Can add speaker recognition model to already created recognizer. Helps to initialize"]
133 |     #[doc = " speaker recognition for grammar-based recognizer."]
134 |     #[doc = ""]
135 |     #[doc = " @param spk_model Speaker recognition model"]
136 |     pub fn vosk_recognizer_set_spk_model(
137 |         recognizer: *mut VoskRecognizer,
138 |         spk_model: *mut VoskSpkModel,
139 |     );
140 | 
141 |     #[doc = " Configures recognizer to output n-best results"]
142 |     #[doc = ""]
143 |     #[doc = " <pre>"]
144 |     #[doc = "   {"]
145 |     #[doc = "      \"alternatives\": ["]
146 |     #[doc = "          { \"text\": \"one two three four five\", \"confidence\": 0.97 },"]
147 |     #[doc = "          { \"text\": \"one two three for five\", \"confidence\": 0.03 },"]
148 |     #[doc = "      ]"]
149 |     #[doc = "   }"]
150 |     #[doc = " </pre>"]
151 |     #[doc = ""]
152 |     #[doc = " @param max_alternatives - maximum alternatives to return from recognition results"]
153 |     pub fn vosk_recognizer_set_max_alternatives(
154 |         recognizer: *mut VoskRecognizer,
155 |         max_alternatives: ::std::os::raw::c_int,
156 |     );
157 | 
158 |     #[doc = " Enables words with times in the output"]
159 |     #[doc = ""]
160 |     #[doc = " <pre>"]
161 |     #[doc = "   \"result\" : [{"]
162 |     #[doc = "       \"conf\" : 1.000000,"]
163 |     #[doc = "       \"end\" : 1.110000,"]
164 |     #[doc = "       \"start\" : 0.870000,"]
165 |     #[doc = "       \"word\" : \"what\""]
166 |     #[doc = "     }, {"]
167 |     #[doc = "       \"conf\" : 1.000000,"]
168 |     #[doc = "       \"end\" : 1.530000,"]
169 |     #[doc = "       \"start\" : 1.110000,"]
170 |     #[doc = "       \"word\" : \"zero\""]
171 |     #[doc = "     }, {"]
172 |     #[doc = "       \"conf\" : 1.000000,"]
173 |     #[doc = "       \"end\" : 1.950000,"]
174 |     #[doc = "       \"start\" : 1.530000,"]
175 |     #[doc = "       \"word\" : \"zero\""]
176 |     #[doc = "     }, {"]
177 |     #[doc = "       \"conf\" : 1.000000,"]
178 |     #[doc = "       \"end\" : 2.340000,"]
179 |     #[doc = "       \"start\" : 1.950000,"]
180 |     #[doc = "       \"word\" : \"zero\""]
181 |     #[doc = "     }, {"]
182 |     #[doc = "       \"conf\" : 1.000000,"]
183 |     #[doc = "       \"end\" : 2.610000,"]
184 |     #[doc = "       \"start\" : 2.340000,"]
185 |     #[doc = "       \"word\" : \"one\""]
186 |     #[doc = "     }],"]
187 |     #[doc = " </pre>"]
188 |     #[doc = ""]
189 |     #[doc = " @param words - boolean value"]
190 |     pub fn vosk_recognizer_set_words(recognizer: *mut VoskRecognizer, words: ::std::os::raw::c_int);
191 | 
192 |     #[doc = " Like above return words and confidences in partial results"]
193 |     #[doc = ""]
194 |     #[doc = " @param partial_words - boolean value"]
195 |     pub fn vosk_recognizer_set_partial_words(
196 |         recognizer: *mut VoskRecognizer,
197 |         partial_words: ::std::os::raw::c_int,
198 |     );
199 | 
200 |     #[doc = " Set NLSML output"]
201 |     #[doc = " @param nlsml - boolean value"]
202 |     pub fn vosk_recognizer_set_nlsml(recognizer: *mut VoskRecognizer, nlsml: ::std::os::raw::c_int);
203 | 
204 |     #[doc = " Accept voice data"]
205 |     #[doc = ""]
206 |     #[doc = "  accept and process new chunk of voice data"]
207 |     #[doc = ""]
208 |     #[doc = "  @param data - audio data in PCM 16-bit mono format"]
209 |     #[doc = "  @param length - length of the audio data"]
210 |     #[doc = "  @returns 1 if silence is occured and you can retrieve a new utterance with result method"]
211 |     #[doc = "           0 if decoding continues"]
212 |     #[doc = "           -1 if exception occured"]
213 |     pub fn vosk_recognizer_accept_waveform(
214 |         recognizer: *mut VoskRecognizer,
215 |         data: *const ::std::os::raw::c_char,
216 |         length: ::std::os::raw::c_int,
217 |     ) -> ::std::os::raw::c_int;
218 | 
219 |     #[doc = " Same as above but the version with the short data for language bindings where you have"]
220 |     #[doc = "  audio as array of shorts"]
221 |     pub fn vosk_recognizer_accept_waveform_s(
222 |         recognizer: *mut VoskRecognizer,
223 |         data: *const ::std::os::raw::c_short,
224 |         length: ::std::os::raw::c_int,
225 |     ) -> ::std::os::raw::c_int;
226 | 
227 |     #[doc = " Same as above but the version with the float data for language bindings where you have"]
228 |     #[doc = "  audio as array of floats"]
229 |     pub fn vosk_recognizer_accept_waveform_f(
230 |         recognizer: *mut VoskRecognizer,
231 |         data: *const f32,
232 |         length: ::std::os::raw::c_int,
233 |     ) -> ::std::os::raw::c_int;
234 | 
235 |     #[doc = " Returns speech recognition result"]
236 |     #[doc = ""]
237 |     #[doc = " @returns the result in JSON format which contains decoded line, decoded"]
238 |     #[doc = "          words, times in seconds and confidences. You can parse this result"]
239 |     #[doc = "          with any json parser"]
240 |     #[doc = ""]
241 |     #[doc = " <pre>"]
242 |     #[doc = "  {"]
243 |     #[doc = "    \"text\" : \"what zero zero zero one\""]
244 |     #[doc = "  }"]
245 |     #[doc = " </pre>"]
246 |     #[doc = ""]
247 |     #[doc = " If alternatives enabled it returns result with alternatives, see also vosk_recognizer_set_alternatives()."]
248 |     #[doc = ""]
249 |     #[doc = " If word times enabled returns word time, see also vosk_recognizer_set_word_times()."]
250 |     pub fn vosk_recognizer_result(recognizer: *mut VoskRecognizer)
251 |         -> *const ::std::os::raw::c_char;
252 | 
253 |     #[doc = " Returns partial speech recognition"]
254 |     #[doc = ""]
255 |     #[doc = " @returns partial speech recognition text which is not yet finalized."]
256 |     #[doc = "          result may change as recognizer process more data."]
257 |     #[doc = ""]
258 |     #[doc = " <pre>"]
259 |     #[doc = " {"]
260 |     #[doc = "    \"partial\" : \"cyril one eight zero\""]
261 |     #[doc = " }"]
262 |     #[doc = " </pre>"]
263 |     pub fn vosk_recognizer_partial_result(
264 |         recognizer: *mut VoskRecognizer,
265 |     ) -> *const ::std::os::raw::c_char;
266 | 
267 |     #[doc = " Returns speech recognition result. Same as result, but doesn't wait for silence"]
268 |     #[doc = "  You usually call it in the end of the stream to get final bits of audio. It"]
269 |     #[doc = "  flushes the feature pipeline, so all remaining audio chunks got processed."]
270 |     #[doc = ""]
271 |     #[doc = "  @returns speech result in JSON format."]
272 |     pub fn vosk_recognizer_final_result(
273 |         recognizer: *mut VoskRecognizer,
274 |     ) -> *const ::std::os::raw::c_char;
275 | 
276 |     #[doc = " Resets the recognizer"]
277 |     #[doc = ""]
278 |     #[doc = "  Resets current results so the recognition can continue from scratch"]
279 |     pub fn vosk_recognizer_reset(recognizer: *mut VoskRecognizer);
280 | 
281 |     #[doc = " Releases recognizer object"]
282 |     #[doc = ""]
283 |     #[doc = "  Underlying model is also unreferenced and if needed released"]
284 |     pub fn vosk_recognizer_free(recognizer: *mut VoskRecognizer);
285 | 
286 |     #[doc = " Set log level for Kaldi messages"]
287 |     #[doc = ""]
288 |     #[doc = "  @param log_level the level"]
289 |     #[doc = "     0 - default value to print info and error messages but no debug"]
290 |     #[doc = "     less than 0 - don't print info messages"]
291 |     #[doc = "     greather than 0 - more verbose mode"]
292 |     pub fn vosk_set_log_level(log_level: ::std::os::raw::c_int);
293 | 
294 |     #[doc = "  Init, automatically select a CUDA device and allow multithreading."]
295 |     #[doc = "  Must be called once from the main thread."]
296 |     #[doc = "  Has no effect if HAVE_CUDA flag is not set."]
297 |     pub fn vosk_gpu_init();
298 | 
299 |     #[doc = "  Init CUDA device in a multi-threaded environment."]
300 |     #[doc = "  Must be called for each thread."]
301 |     #[doc = "  Has no effect if HAVE_CUDA flag is not set."]
302 |     pub fn vosk_gpu_thread_init();
303 | 
304 |     #[doc = " Creates the batch recognizer object"]
305 |     #[doc = ""]
306 |     #[doc = "  @returns model object or NULL if problem occured"]
307 |     pub fn vosk_batch_model_new(model_path: *const ::std::os::raw::c_char) -> *mut VoskBatchModel;
308 | 
309 |     #[doc = " Releases batch model object"]
310 |     pub fn vosk_batch_model_free(model: *mut VoskBatchModel);
311 | 
312 |     #[doc = " Wait for the processing"]
313 |     pub fn vosk_batch_model_wait(model: *mut VoskBatchModel);
314 | 
315 |     #[doc = " Creates batch recognizer object"]
316 |     #[doc = "  @returns recognizer object or NULL if problem occured"]
317 |     pub fn vosk_batch_recognizer_new(
318 |         model: *mut VoskBatchModel,
319 |         sample_rate: f32,
320 |     ) -> *mut VoskBatchRecognizer;
321 | 
322 |     #[doc = " Releases batch recognizer object"]
323 |     pub fn vosk_batch_recognizer_free(recognizer: *mut VoskBatchRecognizer);
324 | 
325 |     #[doc = " Accept batch voice data"]
326 |     pub fn vosk_batch_recognizer_accept_waveform(
327 |         recognizer: *mut VoskBatchRecognizer,
328 |         data: *const ::std::os::raw::c_char,
329 |         length: ::std::os::raw::c_int,
330 |     );
331 | 
332 |     #[doc = " Set NLSML output"]
333 |     #[doc = " @param nlsml - boolean value"]
334 |     pub fn vosk_batch_recognizer_set_nlsml(
335 |         recognizer: *mut VoskBatchRecognizer,
336 |         nlsml: ::std::os::raw::c_int,
337 |     );
338 | 
339 |     #[doc = " Closes the stream"]
340 |     pub fn vosk_batch_recognizer_finish_stream(recognizer: *mut VoskBatchRecognizer);
341 | 
342 |     #[doc = " Return results"]
343 |     pub fn vosk_batch_recognizer_front_result(
344 |         recognizer: *mut VoskBatchRecognizer,
345 |     ) -> *const ::std::os::raw::c_char;
346 | 
347 |     #[doc = " Release and free first retrieved result"]
348 |     pub fn vosk_batch_recognizer_pop(recognizer: *mut VoskBatchRecognizer);
349 | 
350 |     #[doc = " Get amount of pending chunks for more intelligent waiting"]
351 |     pub fn vosk_batch_recognizer_get_pending_chunks(
352 |         recognizer: *mut VoskBatchRecognizer,
353 |     ) -> ::std::os::raw::c_int;
354 | }
355 | 


--------------------------------------------------------------------------------
/crates/vosk/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ../../CHANGELOG.md


--------------------------------------------------------------------------------
/crates/vosk/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "vosk"
 3 | version = "0.3.1"
 4 | edition = "2021"
 5 | authors = ["Bear_03"]
 6 | description = "Safe wrapper around the Vosk API Speech Recognition Toolkit"
 7 | license = "MIT"
 8 | repository = "https://github.com/Bear-03/vosk-rs"
 9 | keywords = ["speech", "speech-to-text", "stt"]
10 | categories = ["api-bindings", "multimedia::audio"]
11 | 
12 | [dependencies]
13 | vosk-sys = { path = "../vosk-sys", version = "0.2" }
14 | serde_json = "1.0"
15 | serde = { version = "1.0", features = ["derive"] }
16 | thiserror = "1.0"
17 | 
18 | [dev-dependencies]
19 | # Dependencies for examples
20 | cpal = "0.15"
21 | dasp = "0.11"
22 | hound = "3.5"
23 | 
24 | [features]
25 | batch = []
26 | 
27 | [package.metadata.docs.rs]
28 | all-features = true
29 | cargo-args = ["-Zunstable-options", "-Zrustdoc-scrape-examples"]
30 | rustdoc-args = ["--cfg", "docsrs"]
31 | 


--------------------------------------------------------------------------------
/crates/vosk/README.md:
--------------------------------------------------------------------------------
1 | ../../README.md


--------------------------------------------------------------------------------
/crates/vosk/examples/grammar.rs:
--------------------------------------------------------------------------------
 1 | //! Run with:
 2 | //! cargo run --example grammar <model path> <wav path>
 3 | //! e.g. "cargo run --example grammar /home/user/stt/model /home/user/stt/test.wav"
 4 | //! (The WAV file must have signed 16-bit samples)
 5 | //!
 6 | //! Read the "Setup" section in the README to know how to link the vosk dynamic
 7 | //! libaries to the examples
 8 | 
 9 | use std::env;
10 | 
11 | use hound::WavReader;
12 | use vosk::{DecodingState, Model, Recognizer};
13 | 
14 | fn main() {
15 |     let mut args = env::args();
16 |     args.next();
17 | 
18 |     let model_path = args.next().expect("A model path was not provided");
19 |     let wav_path = args
20 |         .next()
21 |         .expect("A path for the WAV file to be read was not provided");
22 | 
23 |     let mut reader = WavReader::open(wav_path).expect("Could not create the WAV reader");
24 |     let samples = reader
25 |         .samples()
26 |         .collect::<hound::Result<Vec<i16>>>()
27 |         .expect("Could not read WAV file");
28 | 
29 |     let model = Model::new(model_path).expect("Could not create the model");
30 | 
31 |     let mut recognizer = Recognizer::new_with_grammar(
32 |         &model,
33 |         reader.spec().sample_rate as f32,
34 |         // Provide a list of phrases to be recognized.
35 |         //
36 |         // If "[unk]" is added, it will be the fallback for any word that could not be recognized.
37 |         // Otherwise, the best match will be used in the result, even if it is most likely
38 |         // incorrect.
39 |         //
40 |         // Note that the words in a phrase can still be recognized separately
41 |         &["one two three four five six seven eight nine zero", "[unk]"],
42 |     )
43 |     .expect("Could not create the recognizer");
44 | 
45 |     for sample in samples.chunks(4000) {
46 |         let state = recognizer.accept_waveform(sample).unwrap();
47 |         match state {
48 |             DecodingState::Finalized => {
49 |                 println!("{:#?}", recognizer.result().single().unwrap());
50 |             }
51 |             DecodingState::Running => {
52 |                 println!("{:#?}", recognizer.partial_result());
53 |             }
54 |             DecodingState::Failed => {
55 |                 eprintln!("an error occurred")
56 |             }
57 |         }
58 |     }
59 | 
60 |     println!("{:#?}", recognizer.final_result().single().unwrap());
61 | }
62 | 


--------------------------------------------------------------------------------
/crates/vosk/examples/microphone.rs:
--------------------------------------------------------------------------------
  1 | //! Run with:
  2 | //! cargo run --example microphone <model path> <duration>
  3 | //! e.g. "cargo run --example microphone /home/user/stt/model 10"
  4 | //!
  5 | //! Read the "Setup" section in the README to know how to link the vosk dynamic
  6 | //! libaries to the examples
  7 | 
  8 | use std::{
  9 |     env,
 10 |     sync::{Arc, Mutex},
 11 |     time::Duration,
 12 | };
 13 | 
 14 | use cpal::{
 15 |     traits::{DeviceTrait, HostTrait, StreamTrait},
 16 |     ChannelCount, SampleFormat,
 17 | };
 18 | use dasp::{sample::ToSample, Sample};
 19 | use vosk::{DecodingState, Model, Recognizer};
 20 | 
 21 | fn main() {
 22 |     let mut args = env::args();
 23 |     args.next();
 24 | 
 25 |     let model_path = args.next().expect("A model path was not provided");
 26 |     let record_duration = Duration::from_secs(
 27 |         args.next()
 28 |             .expect("A recording duration was not provided")
 29 |             .parse()
 30 |             .expect("Invalid recording duration"),
 31 |     );
 32 | 
 33 |     let audio_input_device = cpal::default_host()
 34 |         .default_input_device()
 35 |         .expect("No input device connected");
 36 | 
 37 |     let config = audio_input_device
 38 |         .default_input_config()
 39 |         .expect("Failed to load default input config");
 40 |     let channels = config.channels();
 41 | 
 42 |     let model = Model::new(model_path).expect("Could not create the model");
 43 |     let mut recognizer = Recognizer::new(&model, config.sample_rate().0 as f32)
 44 |         .expect("Could not create the Recognizer");
 45 | 
 46 |     recognizer.set_max_alternatives(10);
 47 |     recognizer.set_words(true);
 48 |     recognizer.set_partial_words(true);
 49 | 
 50 |     let recognizer = Arc::new(Mutex::new(recognizer));
 51 | 
 52 |     let err_fn = move |err| {
 53 |         eprintln!("an error occurred on stream: {}", err);
 54 |     };
 55 | 
 56 |     let recognizer_clone = recognizer.clone();
 57 |     let stream = match config.sample_format() {
 58 |         SampleFormat::I8 => audio_input_device.build_input_stream(
 59 |             &config.into(),
 60 |             move |data: &[i8], _| recognize(&mut recognizer_clone.lock().unwrap(), data, channels),
 61 |             err_fn,
 62 |             None,
 63 |         ),
 64 |         SampleFormat::I16 => audio_input_device.build_input_stream(
 65 |             &config.into(),
 66 |             move |data: &[i16], _| recognize(&mut recognizer_clone.lock().unwrap(), data, channels),
 67 |             err_fn,
 68 |             None,
 69 |         ),
 70 |         SampleFormat::I32 => audio_input_device.build_input_stream(
 71 |             &config.into(),
 72 |             move |data: &[i32], _| recognize(&mut recognizer_clone.lock().unwrap(), data, channels),
 73 |             err_fn,
 74 |             None,
 75 |         ),
 76 |         SampleFormat::F32 => audio_input_device.build_input_stream(
 77 |             &config.into(),
 78 |             move |data: &[f32], _| recognize(&mut recognizer_clone.lock().unwrap(), data, channels),
 79 |             err_fn,
 80 |             None,
 81 |         ),
 82 |         sample_format => panic!("Unsupported sample format '{sample_format}'"),
 83 |     }
 84 |     .expect("Could not build stream");
 85 | 
 86 |     stream.play().expect("Could not play stream");
 87 |     println!("Recording...");
 88 | 
 89 |     std::thread::sleep(record_duration);
 90 |     drop(stream);
 91 | 
 92 |     println!("{:#?}", recognizer.lock().unwrap().final_result());
 93 | }
 94 | 
 95 | fn recognize<T: Sample + ToSample<i16>>(
 96 |     recognizer: &mut Recognizer,
 97 |     data: &[T],
 98 |     channels: ChannelCount,
 99 | ) {
100 |     let data: Vec<i16> = data.iter().map(|v| v.to_sample()).collect();
101 |     let data = if channels != 1 {
102 |         stereo_to_mono(&data)
103 |     } else {
104 |         data
105 |     };
106 | 
107 |     let state = recognizer.accept_waveform(&data).unwrap();
108 |     match state {
109 |         DecodingState::Running => {
110 |             println!("partial: {:#?}", recognizer.partial_result());
111 |         }
112 |         DecodingState::Finalized => {
113 |             // Result will always be multiple because we called set_max_alternatives
114 |             println!("result: {:#?}", recognizer.result().multiple().unwrap());
115 |         }
116 |         DecodingState::Failed => eprintln!("error"),
117 |     }
118 | }
119 | 
120 | pub fn stereo_to_mono(input_data: &[i16]) -> Vec<i16> {
121 |     let mut result = Vec::with_capacity(input_data.len() / 2);
122 |     result.extend(
123 |         input_data
124 |             .chunks_exact(2)
125 |             .map(|chunk| chunk[0] / 2 + chunk[1] / 2),
126 |     );
127 | 
128 |     result
129 | }
130 | 


--------------------------------------------------------------------------------
/crates/vosk/examples/read_wav.rs:
--------------------------------------------------------------------------------
 1 | //! Run with:
 2 | //! cargo run --example read_wav <model path> <wav path>
 3 | //! e.g. "cargo run --example read_wav /home/user/stt/model /home/user/stt/test.wav"
 4 | //! (The WAV file must have signed 16-bit samples)
 5 | //!
 6 | //! Read the "Setup" section in the README to know how to link the vosk dynamic
 7 | //! libaries to the examples
 8 | 
 9 | use std::env;
10 | 
11 | use hound::WavReader;
12 | use vosk::{Model, Recognizer};
13 | 
14 | fn main() {
15 |     let mut args = env::args();
16 |     args.next();
17 | 
18 |     let model_path = args.next().expect("A model path was not provided");
19 |     let wav_path = args
20 |         .next()
21 |         .expect("A path for the wav file to be read was not provided");
22 | 
23 |     let mut reader = WavReader::open(wav_path).expect("Could not create the WAV reader");
24 |     let samples = reader
25 |         .samples()
26 |         .collect::<hound::Result<Vec<i16>>>()
27 |         .expect("Could not read WAV file");
28 | 
29 |     let model = Model::new(model_path).expect("Could not create the model");
30 |     let mut recognizer = Recognizer::new(&model, reader.spec().sample_rate as f32)
31 |         .expect("Could not create the recognizer");
32 | 
33 |     recognizer.set_max_alternatives(10);
34 |     recognizer.set_words(true);
35 |     recognizer.set_partial_words(true);
36 | 
37 |     for sample in samples.chunks(100) {
38 |         recognizer.accept_waveform(sample).unwrap();
39 |         println!("{:#?}", recognizer.partial_result());
40 |     }
41 | 
42 |     println!("{:#?}", recognizer.final_result().multiple().unwrap());
43 | }
44 | 


--------------------------------------------------------------------------------
/crates/vosk/examples/speaker_model.rs:
--------------------------------------------------------------------------------
 1 | //! Run with:
 2 | //! cargo run --example read_wav <model path> <speaker model path> <wav path>
 3 | //! e.g. "cargo run --example speaker_model /home/user/stt/model /home/user/stt/speaker_model /home/user/stt/test.wav"
 4 | //! (The WAV file must have signed 16-bit sample)
 5 | //!
 6 | //! Read the "Setup" section in the README to know how to link the vosk dynamic
 7 | //! libaries to the examples
 8 | 
 9 | use std::env;
10 | 
11 | use hound::WavReader;
12 | use vosk::{Model, Recognizer, SpeakerModel};
13 | 
14 | fn main() {
15 |     let mut args = env::args();
16 |     args.next();
17 | 
18 |     let model_path = args.next().expect("A model path was not provided");
19 |     let speaker_model_path = args.next().expect("A speaker model path was not provided");
20 |     let wav_path = args
21 |         .next()
22 |         .expect("A path for the WAV file to be read was not provided");
23 | 
24 |     let mut reader = WavReader::open(wav_path).expect("Could not create the WAV reader");
25 |     let samples = reader
26 |         .samples()
27 |         .collect::<hound::Result<Vec<i16>>>()
28 |         .expect("Could not read WAV file");
29 | 
30 |     let model = Model::new(model_path).expect("Could not create the model");
31 |     let spk_model =
32 |         SpeakerModel::new(speaker_model_path).expect("Could not create the speaker model");
33 |     let mut recognizer =
34 |         Recognizer::new_with_speaker(&model, reader.spec().sample_rate as f32, &spk_model)
35 |             .expect("Could not create the recognizer");
36 | 
37 |     // Alternatives cannot be enabled as the Alternative objets do not contain the speaker info
38 |     // recognizer.set_max_alternatives(10);
39 | 
40 |     // Words will remain disabled so the speaker data is more visible, though they could be enabled
41 |     // recognizer.set_words(true);
42 |     // recognizer.set_partial_words(true);
43 | 
44 |     for sample in samples.chunks(100) {
45 |         recognizer.accept_waveform(sample).unwrap();
46 |         println!("{:#?}", recognizer.partial_result());
47 |     }
48 | 
49 |     println!("{:#?}", recognizer.final_result().single().unwrap());
50 | }
51 | 


--------------------------------------------------------------------------------
/crates/vosk/src/gpu.rs:
--------------------------------------------------------------------------------
 1 | /// Init, automatically select a CUDA device and allow multithreading.
 2 | /// Must be called once from the main thread.
 3 | pub fn gpu_init() {
 4 |     unsafe { vosk_sys::vosk_gpu_init() }
 5 | }
 6 | 
 7 | /// Init CUDA device in a multi-threaded environment.
 8 | /// Must be called for each thread.
 9 | pub fn gpu_thread_init() {
10 |     unsafe { vosk_sys::vosk_gpu_thread_init() }
11 | }
12 | 


--------------------------------------------------------------------------------
/crates/vosk/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![deny(missing_docs)]
 2 | #![cfg_attr(docsrs, feature(doc_auto_cfg))]
 3 | 
 4 | //! Safe FFI bindings around the [Vosk API Speech Recognition Toolkit](https://github.com/alphacep/vosk-api).
 5 | //!
 6 | //! **Basic usage:**
 7 | //! * Create a [`Model`]
 8 | //! * Create a [`Recognizer`] with that model
 9 | //! * Feel audio to the recognizer with [`Recognizer::accept_waveform`]
10 | //! * Get the processed result with [`Recognizer::result`],
11 | //!   [`Recognizer::partial_result`] or [`Recognizer::final_result`]
12 | 
13 | #[cfg(feature = "batch")]
14 | mod gpu;
15 | mod log;
16 | mod models;
17 | mod recognition;
18 | 
19 | pub use crate::{log::*, models::*, recognition::*};
20 | #[cfg(feature = "batch")]
21 | pub use gpu::*;
22 | 


--------------------------------------------------------------------------------
/crates/vosk/src/log.rs:
--------------------------------------------------------------------------------
 1 | use std::os::raw::c_int;
 2 | use vosk_sys::*;
 3 | 
 4 | #[derive(Debug, Default, Clone, Copy)]
 5 | /// Log level for Kaldi messages.
 6 | pub enum LogLevel {
 7 |     /// Print Error, Warn, and Info (default)
 8 |     #[default]
 9 |     Info,
10 | 
11 |     /// Print Error and Warn messages.
12 |     Warn,
13 | 
14 |     /// Only print Error messages.
15 |     Error,
16 | }
17 | 
18 | impl LogLevel {
19 |     pub(self) fn to_c_int(self) -> c_int {
20 |         match self {
21 |             Self::Info => 0,
22 |             Self::Warn => -1,
23 |             Self::Error => -2,
24 |         }
25 |     }
26 | }
27 | 
28 | /// Set log level for Kaldi messages.
29 | ///
30 | /// Default: [`LogLevel::Info`].
31 | pub fn set_log_level(log_level: LogLevel) {
32 |     unsafe { vosk_set_log_level(log_level.to_c_int()) }
33 | }
34 | 


--------------------------------------------------------------------------------
/crates/vosk/src/models/batch.rs:
--------------------------------------------------------------------------------
 1 | use std::{ffi::CString, ptr::NonNull};
 2 | use vosk_sys::*;
 3 | 
 4 | /// The same as [`Model`], but uses
 5 | /// a CUDA enabled Nvidia GPU and dynamic batching to enable higher throughput.
 6 | ///
 7 | /// [`Model`]: super::sequential::Model
 8 | pub struct BatchModel(pub(crate) NonNull<VoskBatchModel>);
 9 | 
10 | impl BatchModel {
11 |     /// Loads model data from the file and returns the model object, or [`None`]
12 |     /// if a problem occured.
13 |     ///
14 |     /// * `model_path` - the path to the model directory.
15 |     #[must_use]
16 |     pub fn new(model_path: impl Into<String>) -> Option<Self> {
17 |         let model_path_c = CString::new(model_path.into()).ok()?;
18 |         let model_ptr = unsafe { vosk_batch_model_new(model_path_c.as_ptr()) };
19 | 
20 |         Some(Self(NonNull::new(model_ptr)?))
21 |     }
22 | 
23 |     /// Waits for inferencing to finish
24 |     pub fn wait(&self) {
25 |         unsafe { vosk_batch_model_wait(self.0.as_ptr()) };
26 |     }
27 | }
28 | 
29 | impl Drop for BatchModel {
30 |     fn drop(&mut self) {
31 |         unsafe { vosk_batch_model_free(self.0.as_ptr()) }
32 |     }
33 | }
34 | 
35 | unsafe impl Send for BatchModel {}
36 | unsafe impl Sync for BatchModel {}
37 | 


--------------------------------------------------------------------------------
/crates/vosk/src/models/mod.rs:
--------------------------------------------------------------------------------
1 | #[cfg(feature = "batch")]
2 | mod batch;
3 | mod sequential;
4 | 
5 | #[cfg(feature = "batch")]
6 | pub use batch::BatchModel;
7 | pub use sequential::{Model, SpeakerModel};
8 | 


--------------------------------------------------------------------------------
/crates/vosk/src/models/sequential.rs:
--------------------------------------------------------------------------------
 1 | use std::{ffi::CString, ptr::NonNull};
 2 | use vosk_sys::*;
 3 | 
 4 | // SAFETY:
 5 | // All models can be safely shared across threads
 6 | // They hold static data and they won't be mutated by Vosk, so it is safe
 7 | // to pass ther pointer to multiple Recognizers even from immutable references
 8 | // https://github.com/alphacep/vosk-api/blob/a7bc5a22d411e22bebf4df1cc5554b473c7456d4/src/vosk_api.h
 9 | 
10 | /// Model that stores all the data required for recognition.
11 | pub struct Model(pub(crate) NonNull<VoskModel>);
12 | 
13 | impl Model {
14 |     /// Loads model data from the file and returns the model object, or [`None`]
15 |     /// if a problem occured.
16 |     ///
17 |     /// * `model_path` - the path to the model directory.
18 |     #[must_use]
19 |     pub fn new(model_path: impl Into<String>) -> Option<Self> {
20 |         let model_path_c = CString::new(model_path.into()).ok()?;
21 |         let model_ptr = unsafe { vosk_model_new(model_path_c.as_ptr()) };
22 | 
23 |         Some(Self(NonNull::new(model_ptr)?))
24 |     }
25 | 
26 |     /// Check if a word can be recognized by the model.
27 |     /// If it is, this returns Some with the index of the word in the model.
28 |     /// If it is not, this returns None.
29 |     ///
30 |     /// Word symbol `0` is for `<epsilon>`.
31 |     #[must_use]
32 |     pub fn find_word(&mut self, word: &str) -> Option<u32> {
33 |         let word_c = CString::new(word).ok()?;
34 | 
35 |         let symbol = unsafe { vosk_model_find_word(self.0.as_ptr(), word_c.as_ptr()) };
36 | 
37 |         if symbol == -1 {
38 |             None
39 |         } else {
40 |             // UNWRAP: the only possible negative symbol was -1
41 |             Some(u32::try_from(symbol).unwrap())
42 |         }
43 |     }
44 | }
45 | 
46 | impl Drop for Model {
47 |     fn drop(&mut self) {
48 |         unsafe { vosk_model_free(self.0.as_ptr()) }
49 |     }
50 | }
51 | 
52 | unsafe impl Send for Model {}
53 | unsafe impl Sync for Model {}
54 | 
55 | /// The same as [`Model`] but contains the data for speaker identification.
56 | pub struct SpeakerModel(pub(crate) NonNull<VoskSpkModel>);
57 | 
58 | impl SpeakerModel {
59 |     /// Loads speaker model data from the file and returns the model
60 |     /// object, or [`None`] if a problem occured.
61 |     ///
62 |     /// * `model_path` - the path to the model in the filesystem.
63 |     #[must_use]
64 |     pub fn new(model_path: impl Into<String>) -> Option<Self> {
65 |         let model_path_c = CString::new(model_path.into()).ok()?;
66 |         let model_ptr = unsafe { vosk_spk_model_new(model_path_c.as_ptr()) };
67 | 
68 |         Some(Self(NonNull::new(model_ptr)?))
69 |     }
70 | }
71 | 
72 | impl Drop for SpeakerModel {
73 |     fn drop(&mut self) {
74 |         unsafe { vosk_spk_model_free(self.0.as_ptr()) }
75 |     }
76 | }
77 | 
78 | unsafe impl Send for SpeakerModel {}
79 | unsafe impl Sync for SpeakerModel {}
80 | 


--------------------------------------------------------------------------------
/crates/vosk/src/recognition/batch.rs:
--------------------------------------------------------------------------------
 1 | use super::{
 2 |     errors::AcceptWaveformError,
 3 |     results::{result_from_json_c_str, Word},
 4 | };
 5 | use crate::models::BatchModel;
 6 | use vosk_sys::*;
 7 | 
 8 | use std::ptr::NonNull;
 9 | 
10 | /// The main object which processes data using GPU inferencing.
11 | /// Takes audio as input and returns decoded information as words, confidences, times, and other metadata.
12 | pub struct BatchRecognizer(std::ptr::NonNull<VoskBatchRecognizer>);
13 | 
14 | impl BatchRecognizer {
15 |     /// Creates the recognizer object. Returns [`None`] if a problem occured.
16 |     ///
17 |     /// The recognizers process the speech and return text using shared model data.
18 |     ///
19 |     /// * `model` - [`BatchModel`] containing static data for recognizer. Model can be shared
20 |     ///   across recognizers, even running in different threads.
21 |     ///
22 |     /// * `sample_rate` - The sample rate of the audio you going to feed into the recognizer.
23 |     ///   Make sure this rate matches the audio content, it is a common issue causing accuracy problems.
24 |     ///
25 |     /// [`BatchModel`]: crate::BatchModel
26 |     #[must_use]
27 |     pub fn new(model: &BatchModel, sample_rate: f32) -> Option<Self> {
28 |         let recognizer_ptr = unsafe { vosk_batch_recognizer_new(model.0.as_ptr(), sample_rate) };
29 |         Some(Self(NonNull::new(recognizer_ptr)?))
30 |     }
31 | 
32 |     /// Enables or disables Natural Language Semantics Markup Language (NLSML) in the output.
33 |     pub fn set_nlsml(&mut self, enable: bool) {
34 |         unsafe { vosk_batch_recognizer_set_nlsml(self.0.as_ptr(), i32::from(enable)) }
35 |     }
36 | 
37 |     /// Accept and process new chunk of voice data.
38 |     ///
39 |     /// * `data` - Audio data in PCM 16-bit mono format as an array of i8.
40 |     pub fn accept_waveform(&mut self, data: &[i8]) -> Result<(), AcceptWaveformError> {
41 |         let len = data.len();
42 | 
43 |         unsafe {
44 |             vosk_batch_recognizer_accept_waveform(
45 |                 self.0.as_ptr(),
46 |                 data.as_ptr(),
47 |                 i32::try_from(len).map_err(|_| AcceptWaveformError::BufferTooLong(len))?,
48 |             )
49 |         };
50 | 
51 |         Ok(())
52 |     }
53 | 
54 |     /// Closes the stream to the model.
55 |     pub fn finish_stream(&mut self) {
56 |         unsafe { vosk_batch_recognizer_finish_stream(self.0.as_ptr()) };
57 |     }
58 | 
59 |     /// Gets the front of the result queue.
60 |     pub fn front_result(&mut self) -> Word {
61 |         unsafe { result_from_json_c_str(vosk_batch_recognizer_front_result(self.0.as_ptr())) }
62 |     }
63 | 
64 |     /// Removes the front of the result queue.
65 |     pub fn pop(&mut self) {
66 |         unsafe { vosk_batch_recognizer_pop(self.0.as_ptr()) }
67 |     }
68 | 
69 |     /// Gets the number of chunks that have yet to be processed.
70 |     pub fn get_pending_chunks(&mut self) -> u32 {
71 |         // UNWRAP: A "count" of chunks will never be negative
72 |         u32::try_from(unsafe { vosk_batch_recognizer_get_pending_chunks(self.0.as_ptr()) }).unwrap()
73 |     }
74 | }
75 | 
76 | // SAFETY: Recognizer shares no state, so it is Send
77 | unsafe impl Send for BatchRecognizer {}
78 | // SAFETY: All methods that mutate Recognizer require a &mut to it,
79 | // which ensures exclusive access, so it is Sync
80 | unsafe impl Sync for BatchRecognizer {}
81 | 
82 | impl Drop for BatchRecognizer {
83 |     fn drop(&mut self) {
84 |         unsafe { vosk_batch_recognizer_free(self.0.as_ptr()) }
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/crates/vosk/src/recognition/errors.rs:
--------------------------------------------------------------------------------
 1 | use thiserror::Error;
 2 | 
 3 | /// Possible errors that accept_waveform methods might return.
 4 | #[derive(Error, Debug)]
 5 | pub enum AcceptWaveformError {
 6 |     /// Error returned if the user passes in a buffer of a length
 7 |     /// that exceeds the maximum supported buffer length.
 8 |     #[error(
 9 |         "the length of the provided audio buffer was {0} (expected < {})",
10 |         i32::MAX
11 |     )]
12 |     BufferTooLong(usize),
13 | }
14 | 


--------------------------------------------------------------------------------
/crates/vosk/src/recognition/mod.rs:
--------------------------------------------------------------------------------
 1 | use std::os::raw::c_int;
 2 | 
 3 | #[cfg(feature = "batch")]
 4 | mod batch;
 5 | mod errors;
 6 | mod results;
 7 | mod sequential;
 8 | 
 9 | #[cfg(feature = "batch")]
10 | pub use batch::BatchRecognizer;
11 | pub use errors::*;
12 | pub use results::*;
13 | pub use sequential::Recognizer;
14 | 
15 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
16 | /// State of the decodification after processing a chunk of data.
17 | pub enum DecodingState {
18 |     /// Silence has occured and you can retrieve a new utterance with the [`Recognizer::result`].
19 |     Finalized,
20 |     /// Decoding still continues.
21 |     Running,
22 |     /// Decoding failed in some way.
23 |     Failed,
24 | }
25 | 
26 | impl DecodingState {
27 |     /// Returns the variant that corresponds to `value` in C.
28 |     pub(self) fn from_c_int(value: c_int) -> Self {
29 |         match value {
30 |             1 => Self::Finalized,
31 |             0 => Self::Running,
32 |             _ => Self::Failed,
33 |         }
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/crates/vosk/src/recognition/results.rs:
--------------------------------------------------------------------------------
  1 | //! Results of a recognition
  2 | 
  3 | use std::ffi::{c_char, CStr};
  4 | 
  5 | use serde::{Deserialize, Serialize};
  6 | 
  7 | /// A single word in a [`CompleteResultSingle`] and metadata about it.
  8 | ///
  9 | /// Unlike in [`WordInAlternative`], the confidence ([`conf`]) is part of each word,
 10 | /// rather than part of an [`Alternative`].
 11 | ///
 12 | /// [`conf`]: Self::conf
 13 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 14 | pub struct Word<'a> {
 15 |     /// Confidence that this word is.
 16 |     pub conf: f32,
 17 | 
 18 |     /// Time in seconds when the word starts.
 19 |     pub start: f32,
 20 | 
 21 |     /// Time in seconds when the word ends.
 22 |     pub end: f32,
 23 | 
 24 |     /// The transcribed word.
 25 |     pub word: &'a str,
 26 | }
 27 | 
 28 | /// A single word in an [`Alternative`] and metadata about it.
 29 | ///
 30 | /// Unlike [`Word`], it does not contain the confidence,
 31 | /// as it is part of the [`Alternative`] itself.
 32 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 33 | pub struct WordInAlternative<'a> {
 34 |     /// Time in seconds when the word starts.
 35 |     pub start: f32,
 36 | 
 37 |     /// Time in seconds when the word ends.
 38 |     pub end: f32,
 39 | 
 40 |     /// The transcribed word.
 41 |     pub word: &'a str,
 42 | }
 43 | 
 44 | /// An alternative transcript in a [`CompleteResultMultiple`].
 45 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 46 | pub struct Alternative<'a> {
 47 |     /// Confidence of the recognizer that this is the correct alternative transcript.
 48 |     pub confidence: f32,
 49 | 
 50 |     /// Collection of words present in [`text`] with metadata about them.
 51 |     ///
 52 |     /// Empty unless [`Recognizer::set_words`] is passed `true`.
 53 |     ///
 54 |     /// [`text`]: Self::text
 55 |     /// [`Recognizer::set_words`]: crate::Recognizer::set_words
 56 |     #[serde(default)]
 57 |     pub result: Vec<WordInAlternative<'a>>,
 58 | 
 59 |     /// Full transcript text.
 60 |     pub text: &'a str,
 61 | }
 62 | 
 63 | /// Recognition result if [`Recognizer::set_max_alternatives`]
 64 | /// is passed a non-zero value.
 65 | ///
 66 | /// Inner type of [`CompleteResult::Multiple`].
 67 | ///
 68 | /// [`Recognizer::set_max_alternatives`]: crate::Recognizer::set_max_alternatives
 69 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 70 | pub struct CompleteResultMultiple<'a> {
 71 |     /// All the possible results of the transcription, ordered from most to less likely.
 72 |     #[serde(borrow)]
 73 |     pub alternatives: Vec<Alternative<'a>>,
 74 | }
 75 | 
 76 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 77 | /// Data useful for speaker identification.
 78 | pub struct SpeakerInfo {
 79 |     /// Speaker vector used for speaker identification.
 80 |     #[serde(rename = "spk")]
 81 |     pub vector: Vec<f32>,
 82 | 
 83 |     /// Data frames in which the speaker was not in silence.
 84 |     #[serde(rename = "spk_frames")]
 85 |     pub frames: u16,
 86 | }
 87 | 
 88 | /// Recognition result if [`Recognizer::set_max_alternatives`]
 89 | /// is passed a zero (default).
 90 | ///
 91 | /// Inner type of [`CompleteResult::Single`].
 92 | ///
 93 | /// [`Recognizer::set_max_alternatives`]: crate::Recognizer::set_max_alternatives
 94 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 95 | pub struct CompleteResultSingle<'a> {
 96 |     /// Information about the speaker, used for speaker identification
 97 |     ///
 98 |     /// Enabled if the [`Recognizer`] was passed a [`SpeakerModel`]  with
 99 |     /// [`Recognizer::new_with_speaker`] or [`Recognizer::set_speaker_model`],
100 |     /// [`None`] otherwise
101 |     ///
102 |     /// [`SpeakerModel`]: crate::SpeakerModel
103 |     /// [`Recognizer`]: crate::Recognizer
104 |     /// [`Recognizer::new_with_speaker`]: crate::Recognizer::new_with_speaker
105 |     /// [`Recognizer::set_speaker_model`]: crate::Recognizer::set_speaker_model
106 |     #[serde(flatten)]
107 |     pub speaker_info: Option<SpeakerInfo>,
108 | 
109 |     /// Collection of words present in [`text`] with metadata about them.
110 |     ///
111 |     /// Empty unless [`Recognizer::set_words`] is passed `true`.
112 |     ///
113 |     /// [`text`]: Self::text
114 |     /// [`Recognizer::set_words`]: crate::Recognizer::set_words
115 |     #[serde(default)]
116 |     pub result: Vec<Word<'a>>,
117 | 
118 |     /// Full text of the transcript.
119 |     pub text: &'a str,
120 | }
121 | 
122 | /// Different results that can be returned from
123 | /// [`Recognizer::result`] and [`Recognizer::final_result`].
124 | ///
125 | /// [`Recognizer::result`]: crate::Recognizer::result
126 | /// [`Recognizer::final_result`]: crate::Recognizer::final_result
127 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
128 | #[serde(untagged)]
129 | pub enum CompleteResult<'a> {
130 |     /// Result if [`Recognizer::set_max_alternatives`] is passed zero (default).
131 |     ///
132 |     /// [`Recognizer::set_max_alternatives`]: crate::Recognizer::set_max_alternatives
133 |     #[serde(borrow)]
134 |     Single(CompleteResultSingle<'a>),
135 | 
136 |     /// Result if [`Recognizer::set_max_alternatives`] is passed a non-zero value.
137 |     ///
138 |     /// [`Recognizer::set_max_alternatives`]: crate::Recognizer::set_max_alternatives
139 |     Multiple(CompleteResultMultiple<'a>),
140 | }
141 | 
142 | impl<'a> CompleteResult<'a> {
143 |     /// Returns the inner [`CompleteResultSingle`] if `self` was [`Single`], and [`None`] otherwise.
144 |     ///
145 |     /// [`Single`]: Self::Single
146 |     #[must_use]
147 |     pub fn single(self) -> Option<CompleteResultSingle<'a>> {
148 |         match self {
149 |             Self::Single(x) => Some(x),
150 |             Self::Multiple(_) => None,
151 |         }
152 |     }
153 | 
154 |     /// Returns the inner [`CompleteResultMultiple`] if `self` was [`Multiple`], and [`None`] otherwise.
155 |     ///
156 |     /// [`Multiple`]: Self::Multiple
157 |     #[must_use]
158 |     pub fn multiple(self) -> Option<CompleteResultMultiple<'a>> {
159 |         match self {
160 |             Self::Single(_) => None,
161 |             Self::Multiple(x) => Some(x),
162 |         }
163 |     }
164 | }
165 | 
166 | /// Result returned by [`Recognizer::partial_result`].
167 | /// The result may change after processing more data as decoding is not yet complete.
168 | ///
169 | /// [`Recognizer::partial_result`]: crate::Recognizer::partial_result
170 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
171 | pub struct PartialResult<'a> {
172 |     // The "partial" JSON key will not be present if partial_result is called when the recognizer isn't running (DecodingState::Running).
173 |     // It makes sense to return an empty string in that case
174 |     /// Full text of the partial transcript.
175 |     #[serde(default)]
176 |     pub partial: &'a str,
177 | 
178 |     /// Collection of words present in [`partial`] with metadata about them.
179 |     ///
180 |     /// Empty unless [`Recognizer::set_partial_words`] is passed `true`.
181 |     ///
182 |     /// [`partial`]: Self::partial
183 |     /// [`Recognizer::set_partial_words`]: crate::Recognizer::set_partial_words
184 |     #[serde(default)]
185 |     pub partial_result: Vec<Word<'a>>,
186 | }
187 | 
188 | /// Generic function to retrieve a given type of result from the recognizer.
189 | pub(super) unsafe fn result_from_json_c_str<'de, T: Deserialize<'de>>(ptr: *const c_char) -> T {
190 |     // UNWRAP: Panics in here will never be the caller's fault, but rather some
191 |     // edge case that was not thought of and should be reported, so it does not
192 |     // make sense to return a Result.
193 |     let string = CStr::from_ptr(ptr).to_str().unwrap();
194 |     serde_json::from_str(string).unwrap_or_else(|_| panic!("Invalid JSON: {string:?}"))
195 | }
196 | 


--------------------------------------------------------------------------------
/crates/vosk/src/recognition/sequential.rs:
--------------------------------------------------------------------------------
  1 | use super::{
  2 |     result_from_json_c_str, AcceptWaveformError, CompleteResult, DecodingState, PartialResult,
  3 | };
  4 | use crate::models::{Model, SpeakerModel};
  5 | 
  6 | use std::{ffi::CString, os::raw::c_int, ptr::NonNull};
  7 | use vosk_sys::*;
  8 | 
  9 | /// The main object which processes data.
 10 | /// Takes audio as input and returns decoded information as words, confidences, times, and other metadata.
 11 | pub struct Recognizer(NonNull<VoskRecognizer>);
 12 | 
 13 | impl Recognizer {
 14 |     /// Creates the recognizer object. Returns [`None`] if a problem occured.
 15 |     ///
 16 |     /// The recognizers process the speech and return text using shared model data.
 17 |     ///
 18 |     /// * `model` - [`Model`] containing static data for recognizer. Model can be shared
 19 |     ///   across recognizers, even running in different threads.
 20 |     ///
 21 |     /// * `sample_rate` - The sample rate of the audio you going to feed into the recognizer.
 22 |     ///   Make sure this rate matches the audio content, it is a common issue causing accuracy problems.
 23 |     ///
 24 |     /// [`Model`]: crate::Model
 25 |     #[must_use]
 26 |     pub fn new(model: &Model, sample_rate: f32) -> Option<Self> {
 27 |         let recognizer_ptr = unsafe { vosk_recognizer_new(model.0.as_ptr(), sample_rate) };
 28 |         Some(Self(NonNull::new(recognizer_ptr)?))
 29 |     }
 30 | 
 31 |     /// Creates the recognizer object with speaker recognition. Returns [`None`] if a problem occured
 32 |     ///
 33 |     /// With the speaker recognition mode the recognizer not just recognize
 34 |     /// text but also return speaker vectors one can use for speaker identification
 35 |     ///
 36 |     /// * `model` - [`Model`] containing the data for recognizer. Model can be
 37 |     ///   shared across recognizers, even running in different threads.
 38 |     ///
 39 |     /// * `sample_rate` - The sample rate of the audio you going to feed into the recognizer.
 40 |     ///   Make sure this rate matches the audio content, it is a common
 41 |     ///   issue causing accuracy problems.
 42 |     ///
 43 |     /// * `spk_model` - Speaker model for speaker identification.
 44 |     ///
 45 |     /// [`Model`]: crate::Model
 46 |     #[must_use]
 47 |     pub fn new_with_speaker(
 48 |         model: &Model,
 49 |         sample_rate: f32,
 50 |         speaker_model: &SpeakerModel,
 51 |     ) -> Option<Self> {
 52 |         let recognizer_ptr = unsafe {
 53 |             vosk_recognizer_new_spk(model.0.as_ptr(), sample_rate, speaker_model.0.as_ptr())
 54 |         };
 55 | 
 56 |         Some(Self(NonNull::new(recognizer_ptr)?))
 57 |     }
 58 | 
 59 |     /// Creates the recognizer object with that only recognizes certain words.
 60 |     /// Returns [`None`] if a problem occured.
 61 |     ///
 62 |     /// Sometimes when you want to improve recognition accuracy and when you don't need
 63 |     /// to recognize large vocabulary you can specify a list of phrases to recognize. This
 64 |     /// will improve recognizer speed and accuracy but might return \[unk\] if user said
 65 |     /// something different.
 66 |     ///
 67 |     /// Only recognizers with lookahead models support this type of quick configuration.
 68 |     /// Precompiled HCLG graph models are not supported.
 69 |     ///
 70 |     /// * `model` - [`Model`] containing the data for recognizer. Model can be shared
 71 |     ///   across recognizers, even running in different threads.
 72 |     ///
 73 |     /// * `sample_rate` - The sample rate of the audio you going to feed into the recognizer.
 74 |     ///   Make sure this rate matches the audio content, it is a common issue causing accuracy problems.
 75 |     ///
 76 |     /// * `grammar` - The list of phrases to recognize.
 77 |     ///
 78 |     /// # Examples
 79 |     ///
 80 |     /// ```no_run
 81 |     /// # use vosk::{Model, Recognizer};
 82 |     /// #
 83 |     /// let model = Model::new("/path/to/model").expect("Could not create a model");
 84 |     /// let recognizer = Recognizer::new_with_grammar(
 85 |     ///     &model,
 86 |     ///     16000.0,
 87 |     ///     &["one two three four five", "[unk]"],
 88 |     /// )
 89 |     /// .expect("Could not create a recognizer");
 90 |     /// ```
 91 |     ///
 92 |     /// [`Model`]: crate::Model
 93 |     #[must_use]
 94 |     pub fn new_with_grammar(
 95 |         model: &Model,
 96 |         sample_rate: f32,
 97 |         grammar: &[impl AsRef<str>],
 98 |     ) -> Option<Self> {
 99 |         let grammar_c = CString::new(format!(
100 |             "[{}]",
101 |             grammar
102 |                 .iter()
103 |                 .map(|phrase| format!("\"{}\"", phrase.as_ref()))
104 |                 .collect::<Vec<_>>()
105 |                 .join(", ")
106 |         ))
107 |         .ok()?;
108 |         let recognizer_ptr =
109 |             unsafe { vosk_recognizer_new_grm(model.0.as_ptr(), sample_rate, grammar_c.as_ptr()) };
110 | 
111 |         Some(Self(NonNull::new(recognizer_ptr)?))
112 |     }
113 | 
114 |     /// Adds speaker model to already initialized recognizer
115 |     ///
116 |     /// Can add speaker recognition model to already created recognizer. Helps to initialize
117 |     /// speaker recognition for grammar-based recognizer.
118 |     pub fn set_speaker_model(&mut self, speaker_model: &SpeakerModel) {
119 |         unsafe { vosk_recognizer_set_spk_model(self.0.as_ptr(), speaker_model.0.as_ptr()) }
120 |     }
121 | 
122 |     /// Configures recognizer to output n-best results in [`result`] and [`final_result`]
123 |     ///
124 |     /// The returned value from those methods will be a [`CompleteResult::Single`]
125 |     /// if `max_alternatives` is 0, and [`CompleteResult::Multiple`] otherwise.
126 |     ///
127 |     /// * `max_alternatives` - Maximum alternatives to return (may be fewer) (default: 0)
128 |     ///
129 |     /// [`result`]: Self::result
130 |     /// [`final_result`]: Self::final_result
131 |     /// [`CompleteResult::Single`]: crate::CompleteResult::Single
132 |     /// [`CompleteResult::Multiple`]: crate::CompleteResult::Multiple
133 |     pub fn set_max_alternatives(&mut self, max_alternatives: u16) {
134 |         unsafe { vosk_recognizer_set_max_alternatives(self.0.as_ptr(), max_alternatives as c_int) }
135 |     }
136 | 
137 |     /// Enables or disables words with metadata in the output, represented as:
138 |     ///
139 |     /// * [`WordInAlternative`] in a [`CompleteResult::Multiple`]
140 |     ///
141 |     /// * [`Word`] in a [`CompleteResult::Single`]
142 |     ///
143 |     /// [`WordInAlternative`]: crate::WordInAlternative
144 |     /// [`Word`]: crate::Word
145 |     /// [`CompleteResult::Multiple`]: crate::CompleteResult::Multiple
146 |     /// [`CompleteResult::Single`]: crate::CompleteResult::Single
147 |     pub fn set_words(&mut self, enable: bool) {
148 |         unsafe { vosk_recognizer_set_words(self.0.as_ptr(), i32::from(enable)) }
149 |     }
150 | 
151 |     /// Like [`set_words`], but for [`PartialResult`].
152 |     ///
153 |     /// Words will always be represented as [`Word`]
154 |     ///
155 |     /// [`set_words`]: Self::set_words
156 |     /// [`PartialResult`]: crate::PartialResult
157 |     /// [`Word`]: crate::Word
158 |     pub fn set_partial_words(&mut self, enable: bool) {
159 |         unsafe { vosk_recognizer_set_partial_words(self.0.as_ptr(), i32::from(enable)) }
160 |     }
161 | 
162 |     /// Enables or disables Natural Language Semantics Markup Language (NLSML) in the output
163 |     pub fn set_nlsml(&mut self, enable: bool) {
164 |         unsafe { vosk_recognizer_set_nlsml(self.0.as_ptr(), i32::from(enable)) }
165 |     }
166 | 
167 |     /// Accept and process new chunk of voice data.
168 |     ///
169 |     /// * `data` - Audio data in PCM 16-bit mono format.
170 |     ///
171 |     /// Returns a [`DecodingState`], which represents the state of the decodification
172 |     /// after this chunk of data has been processed.
173 |     pub fn accept_waveform(&mut self, data: &[i16]) -> Result<DecodingState, AcceptWaveformError> {
174 |         // vosk_recognizer_accept_waveform and vosk_recognizer_accept_waveform_f are meant
175 |         // to be used by languages that do not have an i16 type (those functions also take PCM 16-bit audio,
176 |         // but represented as an f32 or i8). Those extra functions aren't needed in rust so they
177 |         // will not be wrapped
178 | 
179 |         let len = data.len();
180 | 
181 |         let decoding_state = unsafe {
182 |             vosk_recognizer_accept_waveform_s(
183 |                 self.0.as_ptr(),
184 |                 data.as_ptr(),
185 |                 i32::try_from(len).map_err(|_| AcceptWaveformError::BufferTooLong(len))?,
186 |             )
187 |         };
188 | 
189 |         Ok(DecodingState::from_c_int(decoding_state))
190 |     }
191 | 
192 |     /// Returns speech recognition result, waiting for silence (see [`DecodingState::Finalized`]) to give a result.
193 |     ///
194 |     /// The returned value will be a [`CompleteResult::Single`]
195 |     /// if [`set_max_alternatives`] was passed a 0 (default), and
196 |     /// [`CompleteResult::Multiple`] otherwise.
197 |     ///
198 |     /// If words are enabled (see [`set_words`]), it also returns metadata about the words.
199 |     ///
200 |     /// [`set_max_alternatives`]: Self::set_max_alternatives
201 |     /// [`set_words`]: Self::set_words
202 |     /// [`CompleteResult::Multiple`]: crate::CompleteResult::Multiple
203 |     /// [`CompleteResult::Single`]: crate::CompleteResult::Single
204 |     #[must_use]
205 |     pub fn result(&mut self) -> CompleteResult {
206 |         unsafe { result_from_json_c_str(vosk_recognizer_result(self.0.as_ptr())) }
207 |     }
208 | 
209 |     /// Returns partial speech recognition, which is not yet finalized and may change after
210 |     /// processing more data.
211 |     ///
212 |     /// If words are enabled (see [`set_partial_words`]), it also returns metadata about the words.
213 |     ///
214 |     /// [`set_partial_words`]: Self::set_partial_words
215 |     #[must_use]
216 |     pub fn partial_result(&mut self) -> PartialResult {
217 |         unsafe { result_from_json_c_str(vosk_recognizer_partial_result(self.0.as_ptr())) }
218 |     }
219 | 
220 |     /// Returns speech recognition result. Like [`result`] but it does not
221 |     /// wait for silence and it flushes the data so everything is processed
222 |     ///
223 |     /// [`result`]: Self::result
224 |     #[must_use]
225 |     pub fn final_result(&mut self) -> CompleteResult {
226 |         unsafe { result_from_json_c_str(vosk_recognizer_final_result(self.0.as_ptr())) }
227 |     }
228 | 
229 |     /// Resets current results and data so the recognition can continue from scratch
230 |     pub fn reset(&mut self) {
231 |         unsafe {
232 |             vosk_recognizer_reset(self.0.as_ptr());
233 |         }
234 |     }
235 | }
236 | 
237 | // SAFETY: Recognizer shares no state, so it is Send
238 | unsafe impl Send for Recognizer {}
239 | // SAFETY: All methods that mutate Recognizer require a &mut to it,
240 | // which ensures exclusive access, so it is Sync
241 | unsafe impl Sync for Recognizer {}
242 | 
243 | impl Drop for Recognizer {
244 |     fn drop(&mut self) {
245 |         unsafe { vosk_recognizer_free(self.0.as_ptr()) }
246 |     }
247 | }
248 | 


--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nodes": {
  3 |     "flake-compat": {
  4 |       "flake": false,
  5 |       "locked": {
  6 |         "lastModified": 1650374568,
  7 |         "narHash": "sha256-Z+s0J8/r907g149rllvwhb4pKi8Wam5ij0st8PwAh+E=",
  8 |         "owner": "edolstra",
  9 |         "repo": "flake-compat",
 10 |         "rev": "b4a34015c698c7793d592d66adbab377907a2be8",
 11 |         "type": "github"
 12 |       },
 13 |       "original": {
 14 |         "owner": "edolstra",
 15 |         "repo": "flake-compat",
 16 |         "type": "github"
 17 |       }
 18 |     },
 19 |     "flake-utils": {
 20 |       "inputs": {
 21 |         "systems": "systems"
 22 |       },
 23 |       "locked": {
 24 |         "lastModified": 1694529238,
 25 |         "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
 26 |         "owner": "numtide",
 27 |         "repo": "flake-utils",
 28 |         "rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
 29 |         "type": "github"
 30 |       },
 31 |       "original": {
 32 |         "owner": "numtide",
 33 |         "repo": "flake-utils",
 34 |         "type": "github"
 35 |       }
 36 |     },
 37 |     "flake-utils-plus": {
 38 |       "inputs": {
 39 |         "flake-utils": "flake-utils"
 40 |       },
 41 |       "locked": {
 42 |         "lastModified": 1715533576,
 43 |         "narHash": "sha256-fT4ppWeCJ0uR300EH3i7kmgRZnAVxrH+XtK09jQWihk=",
 44 |         "owner": "gytis-ivaskevicius",
 45 |         "repo": "flake-utils-plus",
 46 |         "rev": "3542fe9126dc492e53ddd252bb0260fe035f2c0f",
 47 |         "type": "github"
 48 |       },
 49 |       "original": {
 50 |         "owner": "gytis-ivaskevicius",
 51 |         "repo": "flake-utils-plus",
 52 |         "rev": "3542fe9126dc492e53ddd252bb0260fe035f2c0f",
 53 |         "type": "github"
 54 |       }
 55 |     },
 56 |     "nixpkgs": {
 57 |       "locked": {
 58 |         "lastModified": 1729755165,
 59 |         "narHash": "sha256-6IpnOHWsaSSjT3yvqlrWfHW6HVCT+wOAlUpcooGJ+FQ=",
 60 |         "owner": "NixOS",
 61 |         "repo": "nixpkgs",
 62 |         "rev": "cabaf14d3e69c9921d7acedf5d7d60bb2b90be02",
 63 |         "type": "github"
 64 |       },
 65 |       "original": {
 66 |         "owner": "NixOS",
 67 |         "ref": "nixpkgs-unstable",
 68 |         "repo": "nixpkgs",
 69 |         "type": "github"
 70 |       }
 71 |     },
 72 |     "root": {
 73 |       "inputs": {
 74 |         "nixpkgs": "nixpkgs",
 75 |         "rust-overlay": "rust-overlay",
 76 |         "snowfall-lib": "snowfall-lib"
 77 |       }
 78 |     },
 79 |     "rust-overlay": {
 80 |       "inputs": {
 81 |         "nixpkgs": [
 82 |           "nixpkgs"
 83 |         ]
 84 |       },
 85 |       "locked": {
 86 |         "lastModified": 1729736953,
 87 |         "narHash": "sha256-Rb6JUop7NRklg0uzcre+A+Ebrn/ZiQPkm4QdKg6/3pw=",
 88 |         "owner": "oxalica",
 89 |         "repo": "rust-overlay",
 90 |         "rev": "29b1275740d9283467b8117499ec8cbb35250584",
 91 |         "type": "github"
 92 |       },
 93 |       "original": {
 94 |         "owner": "oxalica",
 95 |         "repo": "rust-overlay",
 96 |         "type": "github"
 97 |       }
 98 |     },
 99 |     "snowfall-lib": {
100 |       "inputs": {
101 |         "flake-compat": "flake-compat",
102 |         "flake-utils-plus": "flake-utils-plus",
103 |         "nixpkgs": [
104 |           "nixpkgs"
105 |         ]
106 |       },
107 |       "locked": {
108 |         "lastModified": 1719005984,
109 |         "narHash": "sha256-mpFl3Jv4fKnn+5znYXG6SsBjfXHJdRG5FEqNSPx0GLA=",
110 |         "owner": "snowfallorg",
111 |         "repo": "lib",
112 |         "rev": "c6238c83de101729c5de3a29586ba166a9a65622",
113 |         "type": "github"
114 |       },
115 |       "original": {
116 |         "owner": "snowfallorg",
117 |         "repo": "lib",
118 |         "type": "github"
119 |       }
120 |     },
121 |     "systems": {
122 |       "locked": {
123 |         "lastModified": 1681028828,
124 |         "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
125 |         "owner": "nix-systems",
126 |         "repo": "default",
127 |         "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
128 |         "type": "github"
129 |       },
130 |       "original": {
131 |         "owner": "nix-systems",
132 |         "repo": "default",
133 |         "type": "github"
134 |       }
135 |     }
136 |   },
137 |   "root": "root",
138 |   "version": 7
139 | }
140 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
 1 | {
 2 |     inputs = {
 3 |         nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
 4 |         snowfall-lib = {
 5 |             url = "github:snowfallorg/lib";
 6 |             inputs.nixpkgs.follows = "nixpkgs";
 7 |         };
 8 |         rust-overlay = {
 9 |             url = "github:oxalica/rust-overlay";
10 |             inputs.nixpkgs.follows = "nixpkgs";
11 |         };
12 |     };
13 | 
14 |     outputs = { snowfall-lib, rust-overlay, ... } @ inputs:
15 |     snowfall-lib.mkFlake {
16 |         inherit inputs;
17 |         src = ./nix;
18 | 
19 |         overlays = [
20 |             (import rust-overlay)
21 |         ];
22 |     };
23 | }


--------------------------------------------------------------------------------
/nix/shells/default/default.nix:
--------------------------------------------------------------------------------
 1 | { pkgs, lib, ... }:
 2 | let
 3 |     # Libs and model files are needed to run the examples for testing purposes
 4 |     voskVersion = "0.3.45";
 5 |     arch = builtins.elemAt (lib.strings.splitString "-" pkgs.system) 0;
 6 |     voskLib = pkgs.fetchzip {
 7 |         url = "https://github.com/alphacep/vosk-api/releases/download/v${voskVersion}/vosk-linux-${arch}-${voskVersion}.zip";
 8 |         hash = "sha256-ToMDbD5ooFMHU0nNlfpLynF29kkfMknBluKO5PipLFY=";
 9 |     };
10 |     model = pkgs.fetchzip {
11 |         url = "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip";
12 |         hash = "sha256-CIoPZ/krX+UW2w7c84W3oc1n4zc9BBS/fc8rVYUthuY=";
13 |     };
14 |     speakerModel = pkgs.fetchzip {
15 |         url = "https://alphacephei.com/vosk/models/vosk-model-spk-0.4.zip";
16 |         hash = "sha256-wpTfZnEL1sCfpLhp+l62d8GcOinR15XnSHaLVASH4RA=";
17 |     };
18 | in
19 | pkgs.mkShell {
20 |     buildInputs = with pkgs; [
21 |         (rust-bin.stable.latest.default.override {
22 |             extensions = [ "rust-src" ];
23 |         })
24 |         alsa-lib
25 |     ];
26 |     nativeBuildInputs = with pkgs; [ pkg-config ];
27 | 
28 |     RUSTFLAGS = "-L${voskLib}";
29 |     LD_LIBRARY_PATH = lib.makeLibraryPath [
30 |         pkgs.stdenv.cc.cc
31 |         voskLib
32 |     ];
33 | 
34 |     # Run the examples like "cargo run --example <example> $MODEL $SPEAKER_MODEL" etc.
35 |     MODEL = model;
36 |     SPEAKER_MODEL = speakerModel;
37 | }


--------------------------------------------------------------------------------