├── .editorconfig ├── .github └── workflows │ └── deepspeech.yml ├── .gitignore ├── .gitmodules ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── bg ├── Cargo.toml └── src │ └── main.rs ├── ci.sh ├── examples ├── client.rs ├── client_extended.rs └── client_simple.rs ├── src ├── dynamic_bindings.rs ├── errors.rs └── lib.rs └── sys ├── Cargo.toml ├── LICENSE ├── build.rs └── src ├── bindings.rs └── lib.rs /.editorconfig: -------------------------------------------------------------------------------- 1 | # top-most EditorConfig file 2 | root = true 3 | 4 | [*] 5 | indent_style = tab 6 | tab_width = 4 7 | end_of_line=lf 8 | charset=utf-8 9 | trim_trailing_whitespace=true 10 | insert_final_newline=true 11 | -------------------------------------------------------------------------------- /.github/workflows/deepspeech.yml: -------------------------------------------------------------------------------- 1 | name: deepspeech 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build-and-test: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | toolchain: [stable, beta, nightly] 11 | steps: 12 | - uses: actions/checkout@v2 13 | with: 14 | submodules: true 15 | - name: Install ${{ matrix.toolchain }} 16 | uses: actions-rs/toolchain@v1 17 | with: 18 | toolchain: ${{ matrix.toolchain }} 19 | - name: Run cargo build 20 | run: cargo build --verbose 21 | - name: Run cargo doc 22 | run: cargo doc --verbose 23 | - name: run ci.sh 24 | run: ./ci.sh 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | sys/target/ 3 | **/*.rs.bk 4 | *swp 5 | sys/Cargo.lock 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "sys/deepspeech"] 2 | path = sys/deepspeech 3 | url = https://github.com/mozilla/deepspeech 4 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "alac" 7 | version = "0.5.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "498a34d3cad5f3b23cc217ab489424ebcfffed186e30ad5ac02624e50df2c2b8" 10 | dependencies = [ 11 | "mp4parse", 12 | ] 13 | 14 | [[package]] 15 | name = "audrey" 16 | version = "0.3.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "58b92a84e89497e3cd25d3672cd5d1c288abaac02c18ff21283f17d118b889b8" 19 | dependencies = [ 20 | "alac", 21 | "caf", 22 | "claxon", 23 | "dasp_frame", 24 | "dasp_sample", 25 | "hound", 26 | "lewton", 27 | ] 28 | 29 | [[package]] 30 | name = "autocfg" 31 | version = "1.0.1" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" 34 | 35 | [[package]] 36 | name = "bg" 37 | version = "0.1.0" 38 | dependencies = [ 39 | "bindgen", 40 | "proc-macro2", 41 | "quote", 42 | "syn", 43 | ] 44 | 45 | [[package]] 46 | name = "bindgen" 47 | version = "0.57.0" 48 | source = "registry+https://github.com/rust-lang/crates.io-index" 49 | checksum = "fd4865004a46a0aafb2a0a5eb19d3c9fc46ee5f063a6cfc605c69ac9ecf5263d" 50 | dependencies = [ 51 | "bitflags", 52 | "cexpr", 53 | "clang-sys", 54 | "lazy_static", 55 | "lazycell", 56 | "peeking_take_while", 57 | "proc-macro2", 58 | "quote", 59 | "regex", 60 | "rustc-hash", 61 | "shlex", 62 | ] 63 | 64 | [[package]] 65 | name = "bitflags" 66 | version = "1.2.1" 67 | source = "registry+https://github.com/rust-lang/crates.io-index" 68 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 69 | 70 | [[package]] 71 | name = "bitreader" 72 | version = "0.3.3" 73 | source = "registry+https://github.com/rust-lang/crates.io-index" 74 | checksum = "70a57a98015fc89125fae6054685a2586739fba82c8dbfe550dac9a5a76791a6" 75 | dependencies = [ 76 | "cfg-if", 77 | ] 78 | 79 | [[package]] 80 | name = "byteorder" 81 | version = "1.4.2" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b" 84 | 85 | [[package]] 86 | name = "caf" 87 | version = "0.1.0" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "8fb1715abe8ffd1caeca4e30b9d803ecb75698bef0e59d207d4b0f16757744e7" 90 | dependencies = [ 91 | "byteorder", 92 | ] 93 | 94 | [[package]] 95 | name = "cexpr" 96 | version = "0.4.0" 97 | source = "registry+https://github.com/rust-lang/crates.io-index" 98 | checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27" 99 | dependencies = [ 100 | "nom", 101 | ] 102 | 103 | [[package]] 104 | name = "cfg-if" 105 | version = "1.0.0" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 108 | 109 | [[package]] 110 | name = "clang-sys" 111 | version = "1.1.1" 112 | source = "registry+https://github.com/rust-lang/crates.io-index" 113 | checksum = "f54d78e30b388d4815220c8dd03fea5656b6c6d32adb59e89061552a102f8da1" 114 | dependencies = [ 115 | "glob", 116 | "libc", 117 | "libloading", 118 | ] 119 | 120 | [[package]] 121 | name = "claxon" 122 | version = "0.4.3" 123 | source = "registry+https://github.com/rust-lang/crates.io-index" 124 | checksum = "4bfbf56724aa9eca8afa4fcfadeb479e722935bb2a0900c2d37e0cc477af0688" 125 | 126 | [[package]] 127 | name = "dasp_envelope" 128 | version = "0.11.0" 129 | source = "registry+https://github.com/rust-lang/crates.io-index" 130 | checksum = "8ec617ce7016f101a87fe85ed44180839744265fae73bb4aa43e7ece1b7668b6" 131 | dependencies = [ 132 | "dasp_frame", 133 | "dasp_peak", 134 | "dasp_ring_buffer", 135 | "dasp_rms", 136 | "dasp_sample", 137 | ] 138 | 139 | [[package]] 140 | name = "dasp_frame" 141 | version = "0.11.0" 142 | source = "registry+https://github.com/rust-lang/crates.io-index" 143 | checksum = "b2a3937f5fe2135702897535c8d4a5553f8b116f76c1529088797f2eee7c5cd6" 144 | dependencies = [ 145 | "dasp_sample", 146 | ] 147 | 148 | [[package]] 149 | name = "dasp_interpolate" 150 | version = "0.11.0" 151 | source = "registry+https://github.com/rust-lang/crates.io-index" 152 | checksum = "7fc975a6563bb7ca7ec0a6c784ead49983a21c24835b0bc96eea11ee407c7486" 153 | dependencies = [ 154 | "dasp_frame", 155 | "dasp_ring_buffer", 156 | "dasp_sample", 157 | ] 158 | 159 | [[package]] 160 | name = "dasp_peak" 161 | version = "0.11.0" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "5cf88559d79c21f3d8523d91250c397f9a15b5fc72fbb3f87fdb0a37b79915bf" 164 | dependencies = [ 165 | "dasp_frame", 166 | "dasp_sample", 167 | ] 168 | 169 | [[package]] 170 | name = "dasp_ring_buffer" 171 | version = "0.11.0" 172 | source = "registry+https://github.com/rust-lang/crates.io-index" 173 | checksum = "07d79e19b89618a543c4adec9c5a347fe378a19041699b3278e616e387511ea1" 174 | 175 | [[package]] 176 | name = "dasp_rms" 177 | version = "0.11.0" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | checksum = "a6c5dcb30b7e5014486e2822537ea2beae50b19722ffe2ed7549ab03774575aa" 180 | dependencies = [ 181 | "dasp_frame", 182 | "dasp_ring_buffer", 183 | "dasp_sample", 184 | ] 185 | 186 | [[package]] 187 | name = "dasp_sample" 188 | version = "0.11.0" 189 | source = "registry+https://github.com/rust-lang/crates.io-index" 190 | checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f" 191 | 192 | [[package]] 193 | name = "dasp_signal" 194 | version = "0.11.0" 195 | source = "registry+https://github.com/rust-lang/crates.io-index" 196 | checksum = "aa1ab7d01689c6ed4eae3d38fe1cea08cba761573fbd2d592528d55b421077e7" 197 | dependencies = [ 198 | "dasp_envelope", 199 | "dasp_frame", 200 | "dasp_interpolate", 201 | "dasp_peak", 202 | "dasp_ring_buffer", 203 | "dasp_rms", 204 | "dasp_sample", 205 | "dasp_window", 206 | ] 207 | 208 | [[package]] 209 | name = "dasp_window" 210 | version = "0.11.0" 211 | source = "registry+https://github.com/rust-lang/crates.io-index" 212 | checksum = "66bcb90ea007ba45fc48d426e28af3e8a653634f9a7174d768dcfe90fa6211f4" 213 | dependencies = [ 214 | "dasp_sample", 215 | ] 216 | 217 | [[package]] 218 | name = "deepspeech" 219 | version = "0.9.0" 220 | dependencies = [ 221 | "audrey", 222 | "dasp_interpolate", 223 | "dasp_signal", 224 | "deepspeech-sys", 225 | "libc", 226 | "libloading", 227 | ] 228 | 229 | [[package]] 230 | name = "deepspeech-sys" 231 | version = "0.9.1" 232 | 233 | [[package]] 234 | name = "glob" 235 | version = "0.3.0" 236 | source = "registry+https://github.com/rust-lang/crates.io-index" 237 | checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" 238 | 239 | [[package]] 240 | name = "hound" 241 | version = "3.4.0" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "8a164bb2ceaeff4f42542bdb847c41517c78a60f5649671b2a07312b6e117549" 244 | 245 | [[package]] 246 | name = "lazy_static" 247 | version = "1.4.0" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 250 | 251 | [[package]] 252 | name = "lazycell" 253 | version = "1.3.0" 254 | source = "registry+https://github.com/rust-lang/crates.io-index" 255 | checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" 256 | 257 | [[package]] 258 | name = "lewton" 259 | version = "0.9.4" 260 | source = "registry+https://github.com/rust-lang/crates.io-index" 261 | checksum = "8d542c1a317036c45c2aa1cf10cc9d403ca91eb2d333ef1a4917e5cb10628bd0" 262 | dependencies = [ 263 | "byteorder", 264 | "ogg", 265 | "smallvec", 266 | ] 267 | 268 | [[package]] 269 | name = "libc" 270 | version = "0.2.87" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | checksum = "265d751d31d6780a3f956bb5b8022feba2d94eeee5a84ba64f4212eedca42213" 273 | 274 | [[package]] 275 | name = "libloading" 276 | version = "0.7.0" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a" 279 | dependencies = [ 280 | "cfg-if", 281 | "winapi", 282 | ] 283 | 284 | [[package]] 285 | name = "log" 286 | version = "0.4.14" 287 | source = "registry+https://github.com/rust-lang/crates.io-index" 288 | checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" 289 | dependencies = [ 290 | "cfg-if", 291 | ] 292 | 293 | [[package]] 294 | name = "maybe-uninit" 295 | version = "2.0.0" 296 | source = "registry+https://github.com/rust-lang/crates.io-index" 297 | checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" 298 | 299 | [[package]] 300 | name = "memchr" 301 | version = "2.3.4" 302 | source = "registry+https://github.com/rust-lang/crates.io-index" 303 | checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" 304 | 305 | [[package]] 306 | name = "mp4parse" 307 | version = "0.10.1" 308 | source = "registry+https://github.com/rust-lang/crates.io-index" 309 | checksum = "7316728464443fe5793a805dde3257864e9690cf46374daff3ce93de1df2f254" 310 | dependencies = [ 311 | "bitreader", 312 | "byteorder", 313 | "log", 314 | "num-traits", 315 | ] 316 | 317 | [[package]] 318 | name = "nom" 319 | version = "5.1.2" 320 | source = "registry+https://github.com/rust-lang/crates.io-index" 321 | checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" 322 | dependencies = [ 323 | "memchr", 324 | "version_check", 325 | ] 326 | 327 | [[package]] 328 | name = "num-traits" 329 | version = "0.2.14" 330 | source = "registry+https://github.com/rust-lang/crates.io-index" 331 | checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" 332 | dependencies = [ 333 | "autocfg", 334 | ] 335 | 336 | [[package]] 337 | name = "ogg" 338 | version = "0.7.1" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "13e571c3517af9e1729d4c63571a27edd660ade0667973bfc74a67c660c2b651" 341 | dependencies = [ 342 | "byteorder", 343 | ] 344 | 345 | [[package]] 346 | name = "peeking_take_while" 347 | version = "0.1.2" 348 | source = "registry+https://github.com/rust-lang/crates.io-index" 349 | checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" 350 | 351 | [[package]] 352 | name = "proc-macro2" 353 | version = "1.0.24" 354 | source = "registry+https://github.com/rust-lang/crates.io-index" 355 | checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" 356 | dependencies = [ 357 | "unicode-xid", 358 | ] 359 | 360 | [[package]] 361 | name = "quote" 362 | version = "1.0.9" 363 | source = "registry+https://github.com/rust-lang/crates.io-index" 364 | checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" 365 | dependencies = [ 366 | "proc-macro2", 367 | ] 368 | 369 | [[package]] 370 | name = "regex" 371 | version = "1.4.3" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a" 374 | dependencies = [ 375 | "regex-syntax", 376 | ] 377 | 378 | [[package]] 379 | name = "regex-syntax" 380 | version = "0.6.22" 381 | source = "registry+https://github.com/rust-lang/crates.io-index" 382 | checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" 383 | 384 | [[package]] 385 | name = "rustc-hash" 386 | version = "1.1.0" 387 | source = "registry+https://github.com/rust-lang/crates.io-index" 388 | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 389 | 390 | [[package]] 391 | name = "shlex" 392 | version = "0.1.1" 393 | source = "registry+https://github.com/rust-lang/crates.io-index" 394 | checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" 395 | 396 | [[package]] 397 | name = "smallvec" 398 | version = "0.6.14" 399 | source = "registry+https://github.com/rust-lang/crates.io-index" 400 | checksum = "b97fcaeba89edba30f044a10c6a3cc39df9c3f17d7cd829dd1446cab35f890e0" 401 | dependencies = [ 402 | "maybe-uninit", 403 | ] 404 | 405 | [[package]] 406 | name = "syn" 407 | version = "1.0.60" 408 | source = "registry+https://github.com/rust-lang/crates.io-index" 409 | checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081" 410 | dependencies = [ 411 | "proc-macro2", 412 | "quote", 413 | "unicode-xid", 414 | ] 415 | 416 | [[package]] 417 | name = "unicode-xid" 418 | version = "0.2.1" 419 | source = "registry+https://github.com/rust-lang/crates.io-index" 420 | checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" 421 | 422 | [[package]] 423 | name = "version_check" 424 | version = "0.9.2" 425 | source = "registry+https://github.com/rust-lang/crates.io-index" 426 | checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" 427 | 428 | [[package]] 429 | name = "winapi" 430 | version = "0.3.9" 431 | source = "registry+https://github.com/rust-lang/crates.io-index" 432 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 433 | dependencies = [ 434 | "winapi-i686-pc-windows-gnu", 435 | "winapi-x86_64-pc-windows-gnu", 436 | ] 437 | 438 | [[package]] 439 | name = "winapi-i686-pc-windows-gnu" 440 | version = "0.4.0" 441 | source = "registry+https://github.com/rust-lang/crates.io-index" 442 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 443 | 444 | [[package]] 445 | name = "winapi-x86_64-pc-windows-gnu" 446 | version = "0.4.0" 447 | source = "registry+https://github.com/rust-lang/crates.io-index" 448 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 449 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["sys", "bg"] 3 | 4 | [package] 5 | name = "deepspeech" 6 | description = "Rust bindings to Mozilla's DeepSpeech STT implementation" 7 | license = "MIT OR Apache-2.0" 8 | documentation = "https://docs.rs/deepspeech" 9 | repository = "https://github.com/RustAudio/deepspeech-rs" 10 | version = "0.9.1" 11 | authors = ["est31 "] 12 | readme = "README.md" 13 | 14 | [dev-dependencies] 15 | audrey = "0.3" 16 | dasp_interpolate = { version = "0.11", features = ["linear"] } 17 | dasp_signal = "0.11" 18 | 19 | [dependencies] 20 | libc = "0.2" 21 | deepspeech-sys = { version = "0.9", path = "sys", optional = true } 22 | libloading = { version = "0.7", optional = true } 23 | 24 | [features] 25 | default = ["static_bindings"] 26 | dynamic = ["libloading"] 27 | static_bindings = ["deepspeech-sys"] 28 | 29 | [badges] 30 | maintenance = { status = "deprecated" } 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 est31 and contributors 2 | 3 | Licensed under MIT or Apache License 2.0, 4 | at your option. 5 | 6 | The full list of contributors can be obtained by looking 7 | at the VCS log (originally, this crate was git versioned, 8 | there you can do "git shortlog -sn" for this task). 9 | 10 | MIT License 11 | ----------- 12 | 13 | The MIT License (MIT) 14 | 15 | Copyright (c) 2017 est31 and contributors 16 | 17 | Permission is hereby granted, free of charge, to any person obtaining a copy 18 | of this software and associated documentation files (the "Software"), to deal 19 | in the Software without restriction, including without limitation the rights 20 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 21 | copies of the Software, and to permit persons to whom the Software is 22 | furnished to do so, subject to the following conditions: 23 | 24 | The above copyright notice and this permission notice shall be included in all 25 | copies or substantial portions of the Software. 26 | 27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 28 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 29 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 30 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 31 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 32 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 | SOFTWARE. 34 | 35 | 36 | 37 | Apache License, version 2.0 38 | --------------------------- 39 | Apache License 40 | Version 2.0, January 2004 41 | http://www.apache.org/licenses/ 42 | 43 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 44 | 45 | 1. Definitions. 46 | 47 | "License" shall mean the terms and conditions for use, reproduction, 48 | and distribution as defined by Sections 1 through 9 of this document. 49 | 50 | "Licensor" shall mean the copyright owner or entity authorized by 51 | the copyright owner that is granting the License. 52 | 53 | "Legal Entity" shall mean the union of the acting entity and all 54 | other entities that control, are controlled by, or are under common 55 | control with that entity. For the purposes of this definition, 56 | "control" means (i) the power, direct or indirect, to cause the 57 | direction or management of such entity, whether by contract or 58 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 59 | outstanding shares, or (iii) beneficial ownership of such entity. 60 | 61 | "You" (or "Your") shall mean an individual or Legal Entity 62 | exercising permissions granted by this License. 63 | 64 | "Source" form shall mean the preferred form for making modifications, 65 | including but not limited to software source code, documentation 66 | source, and configuration files. 67 | 68 | "Object" form shall mean any form resulting from mechanical 69 | transformation or translation of a Source form, including but 70 | not limited to compiled object code, generated documentation, 71 | and conversions to other media types. 72 | 73 | "Work" shall mean the work of authorship, whether in Source or 74 | Object form, made available under the License, as indicated by a 75 | copyright notice that is included in or attached to the work 76 | (an example is provided in the Appendix below). 77 | 78 | "Derivative Works" shall mean any work, whether in Source or Object 79 | form, that is based on (or derived from) the Work and for which the 80 | editorial revisions, annotations, elaborations, or other modifications 81 | represent, as a whole, an original work of authorship. For the purposes 82 | of this License, Derivative Works shall not include works that remain 83 | separable from, or merely link (or bind by name) to the interfaces of, 84 | the Work and Derivative Works thereof. 85 | 86 | "Contribution" shall mean any work of authorship, including 87 | the original version of the Work and any modifications or additions 88 | to that Work or Derivative Works thereof, that is intentionally 89 | submitted to Licensor for inclusion in the Work by the copyright owner 90 | or by an individual or Legal Entity authorized to submit on behalf of 91 | the copyright owner. For the purposes of this definition, "submitted" 92 | means any form of electronic, verbal, or written communication sent 93 | to the Licensor or its representatives, including but not limited to 94 | communication on electronic mailing lists, source code control systems, 95 | and issue tracking systems that are managed by, or on behalf of, the 96 | Licensor for the purpose of discussing and improving the Work, but 97 | excluding communication that is conspicuously marked or otherwise 98 | designated in writing by the copyright owner as "Not a Contribution." 99 | 100 | "Contributor" shall mean Licensor and any individual or Legal Entity 101 | on behalf of whom a Contribution has been received by Licensor and 102 | subsequently incorporated within the Work. 103 | 104 | 2. Grant of Copyright License. Subject to the terms and conditions of 105 | this License, each Contributor hereby grants to You a perpetual, 106 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 107 | copyright license to reproduce, prepare Derivative Works of, 108 | publicly display, publicly perform, sublicense, and distribute the 109 | Work and such Derivative Works in Source or Object form. 110 | 111 | 3. Grant of Patent License. Subject to the terms and conditions of 112 | this License, each Contributor hereby grants to You a perpetual, 113 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 114 | (except as stated in this section) patent license to make, have made, 115 | use, offer to sell, sell, import, and otherwise transfer the Work, 116 | where such license applies only to those patent claims licensable 117 | by such Contributor that are necessarily infringed by their 118 | Contribution(s) alone or by combination of their Contribution(s) 119 | with the Work to which such Contribution(s) was submitted. If You 120 | institute patent litigation against any entity (including a 121 | cross-claim or counterclaim in a lawsuit) alleging that the Work 122 | or a Contribution incorporated within the Work constitutes direct 123 | or contributory patent infringement, then any patent licenses 124 | granted to You under this License for that Work shall terminate 125 | as of the date such litigation is filed. 126 | 127 | 4. Redistribution. You may reproduce and distribute copies of the 128 | Work or Derivative Works thereof in any medium, with or without 129 | modifications, and in Source or Object form, provided that You 130 | meet the following conditions: 131 | 132 | (a) You must give any other recipients of the Work or 133 | Derivative Works a copy of this License; and 134 | 135 | (b) You must cause any modified files to carry prominent notices 136 | stating that You changed the files; and 137 | 138 | (c) You must retain, in the Source form of any Derivative Works 139 | that You distribute, all copyright, patent, trademark, and 140 | attribution notices from the Source form of the Work, 141 | excluding those notices that do not pertain to any part of 142 | the Derivative Works; and 143 | 144 | (d) If the Work includes a "NOTICE" text file as part of its 145 | distribution, then any Derivative Works that You distribute must 146 | include a readable copy of the attribution notices contained 147 | within such NOTICE file, excluding those notices that do not 148 | pertain to any part of the Derivative Works, in at least one 149 | of the following places: within a NOTICE text file distributed 150 | as part of the Derivative Works; within the Source form or 151 | documentation, if provided along with the Derivative Works; or, 152 | within a display generated by the Derivative Works, if and 153 | wherever such third-party notices normally appear. The contents 154 | of the NOTICE file are for informational purposes only and 155 | do not modify the License. You may add Your own attribution 156 | notices within Derivative Works that You distribute, alongside 157 | or as an addendum to the NOTICE text from the Work, provided 158 | that such additional attribution notices cannot be construed 159 | as modifying the License. 160 | 161 | You may add Your own copyright statement to Your modifications and 162 | may provide additional or different license terms and conditions 163 | for use, reproduction, or distribution of Your modifications, or 164 | for any such Derivative Works as a whole, provided Your use, 165 | reproduction, and distribution of the Work otherwise complies with 166 | the conditions stated in this License. 167 | 168 | 5. Submission of Contributions. Unless You explicitly state otherwise, 169 | any Contribution intentionally submitted for inclusion in the Work 170 | by You to the Licensor shall be under the terms and conditions of 171 | this License, without any additional terms or conditions. 172 | Notwithstanding the above, nothing herein shall supersede or modify 173 | the terms of any separate license agreement you may have executed 174 | with Licensor regarding such Contributions. 175 | 176 | 6. Trademarks. This License does not grant permission to use the trade 177 | names, trademarks, service marks, or product names of the Licensor, 178 | except as required for reasonable and customary use in describing the 179 | origin of the Work and reproducing the content of the NOTICE file. 180 | 181 | 7. Disclaimer of Warranty. Unless required by applicable law or 182 | agreed to in writing, Licensor provides the Work (and each 183 | Contributor provides its Contributions) on an "AS IS" BASIS, 184 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 185 | implied, including, without limitation, any warranties or conditions 186 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 187 | PARTICULAR PURPOSE. You are solely responsible for determining the 188 | appropriateness of using or redistributing the Work and assume any 189 | risks associated with Your exercise of permissions under this License. 190 | 191 | 8. Limitation of Liability. In no event and under no legal theory, 192 | whether in tort (including negligence), contract, or otherwise, 193 | unless required by applicable law (such as deliberate and grossly 194 | negligent acts) or agreed to in writing, shall any Contributor be 195 | liable to You for damages, including any direct, indirect, special, 196 | incidental, or consequential damages of any character arising as a 197 | result of this License or out of the use or inability to use the 198 | Work (including but not limited to damages for loss of goodwill, 199 | work stoppage, computer failure or malfunction, or any and all 200 | other commercial damages or losses), even if such Contributor 201 | has been advised of the possibility of such damages. 202 | 203 | 9. Accepting Warranty or Additional Liability. While redistributing 204 | the Work or Derivative Works thereof, You may choose to offer, 205 | and charge a fee for, acceptance of support, warranty, indemnity, 206 | or other liability obligations and/or rights consistent with this 207 | License. However, in accepting such obligations, You may act only 208 | on Your own behalf and on Your sole responsibility, not on behalf 209 | of any other Contributor, and only if You agree to indemnify, 210 | defend, and hold each Contributor harmless for any liability 211 | incurred by, or claims asserted against, such Contributor by reason 212 | of your accepting any such warranty or additional liability. 213 | 214 | END OF TERMS AND CONDITIONS 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DEPRECATION NOTICE 2 | 3 | **This project is [deprecated](https://github.com/RustAudio/deepspeech-rs/issues/48) in favour of [coqui-stt](https://github.com/tazz4843/coqui-stt). 4 | Please use that project instead.** 5 | 6 | # deepspeech-rs 7 | 8 | [![docs](https://docs.rs/deepspeech/badge.svg)](https://docs.rs/crate/deepspeech) 9 | [![crates.io](https://img.shields.io/crates/v/deepspeech.svg)](https://crates.io/crates/deepspeech) 10 | 11 | Rust bindings of [Mozilla's DeepSpeech](https://github.com/mozilla/DeepSpeech) library. 12 | 13 | The library is open source and performs Speech-To-Text completely offline. They provide pretrained models for English. 14 | 15 | ## Quickstart 16 | 17 | Preparation: 18 | 19 | 1. Obtain the Deepspeech `native_client` library. The [release announcement] contains precompiled libraries for various targets. 20 | 2. Download the pretrained models named like `deepspeech-{version}-models.tar.gz` from the release announcement and extract the zip file to some location. 21 | 3. Add the directory where the `native_client` library lies to your `LD_LIBRARY_PATH` and `LIBRARY_PATH` environment variables. 22 | 23 | You can now invoke the example via: 24 | 25 | ``` 26 | cargo run --release --example client 27 | ``` 28 | 29 | It will print out the recognized sequence on stdout. The format of the audio files is important: only mono files are supported for now. 30 | 31 | All codecs that the awesome [audrey](https://github.com/RustAudio/audrey) library supports are supported. 32 | 33 | See DeepSpeech's [release announcement] for more. 34 | 35 | [release announcement]: https://github.com/mozilla/DeepSpeech/releases/tag/v0.9.0 36 | 37 | ## Supported versions of DeepSpeech 38 | 39 | We currently support version `0.9.0` of the DeepSpeech library. 40 | We will always try to provide compatibility with the most recent release possible. 41 | 42 | ## License 43 | 44 | Licensed under Apache 2 or MIT (at your option). For details, see the [LICENSE](LICENSE) file. 45 | 46 | All examples inside the `examples/` folder are licensed under the 47 | [CC-0](https://creativecommons.org/publicdomain/zero/1.0/) license. 48 | 49 | The generated bindings (`sys` subdirectory in git, `-sys` crate on crates.io) fall under the Mozilla Public License, version 2.0. 50 | 51 | ### License of your contributions 52 | 53 | Unless you explicitly state otherwise, any contribution intentionally submitted for 54 | inclusion in the work by you, as defined in the Apache-2.0 license, 55 | shall be dual licensed / CC-0 licensed as above, without any additional terms or conditions. 56 | -------------------------------------------------------------------------------- /bg/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bg" 3 | description = "Bindgen wrapper" 4 | publish = false 5 | license = "MIT OR Apache-2.0" 6 | repository = "https://github.com/RustAudio/deepspeech-rs" 7 | version = "0.1.0" 8 | authors = ["est31 "] 9 | 10 | [dependencies] 11 | bindgen = { version = "0.57", default-features = false, features = ["runtime"] } 12 | proc-macro2 = { version = "1.0", default-features = false } 13 | syn = {version = "1.0.33", features = ["full", "extra-traits", "printing"]} 14 | quote = "1" -------------------------------------------------------------------------------- /bg/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate bindgen; 2 | extern crate proc_macro2; 3 | extern crate syn; 4 | extern crate quote; 5 | 6 | use proc_macro2::{Delimiter, Spacing, TokenStream, TokenTree}; 7 | use quote::ToTokens; 8 | use std::path::Path; 9 | use std::path::PathBuf; 10 | 11 | /// Get the function bindings that will either need to be attached to a dynamic library object 12 | /// or re-created into an `extern "C"` block. 13 | fn get_binding_functions(file: &mut syn::File) -> Vec { 14 | let mut retvl = Vec::new(); 15 | // Iterate in reverse order by index so we can modify in-place 16 | for idx in (0..file.items.len()).rev() { 17 | // Just to be safe, also verify that they really are extern "C" blocks. 18 | let should_get = match file.items.get(idx) { 19 | Some(syn::Item::ForeignMod(m)) => { 20 | m.abi.name.as_ref().map_or(false, |l| &l.value() == "C") 21 | } 22 | _ => false, 23 | }; 24 | if !should_get { 25 | continue; 26 | } 27 | let mut binding = match file.items.remove(idx) { 28 | syn::Item::ForeignMod(m) => m, 29 | _ => unreachable!(), 30 | }; 31 | 32 | // Currently do not support generated function attributes. 33 | assert!(binding.attrs.is_empty()); 34 | for item in binding.items.drain(..) { 35 | match item { 36 | syn::ForeignItem::Fn(bind_fn) => { 37 | retvl.push(bind_fn); 38 | } 39 | other => { 40 | // Currently do not support statics. 41 | unimplemented!("Found non-function binding {:?}", other); 42 | } 43 | } 44 | } 45 | } 46 | retvl 47 | } 48 | 49 | /// Generates the function binding code. 50 | /// 51 | /// In the case of a dynamic library, this creates a struct called `LibraryWrapper` with a single field, `inner`, 52 | /// of type `libloading::Library`. Initially, it is also created with a constructor, 53 | /// `from_path(path : impl AsRef) -> Result`, to load a shard object at the given path. 54 | /// 55 | /// The functions are then transformed from raw `extern "C" fn name(inputs) -> outputs` 56 | /// into functions of the form `pub unsafe fn name(&self, inputs) -> Result`. 57 | /// 58 | /// The error case is for the case that `libloading` cannot find the symbol. 59 | fn construct_bindings(raw_bindings: Vec, should_dyn: bool) -> Vec { 60 | if !should_dyn { 61 | // Case static library: just wrap in an `extern "C"` block. 62 | let mut wrapper: syn::ItemForeignMod = syn::parse_quote!( 63 | extern "C" {} 64 | ); 65 | for binding in raw_bindings { 66 | wrapper.items.push(syn::ForeignItem::Fn(binding)); 67 | } 68 | vec![syn::Item::ForeignMod(wrapper)] 69 | } else { 70 | // The struct declaration of the dynamic library wrapper 71 | let library_struct_wrapper: syn::Item = syn::parse_quote!( 72 | #[derive(Clone)] 73 | pub struct LibraryWrapper { 74 | inner: std::sync::Arc, 75 | } 76 | ); 77 | 78 | // The `impl` block, initialized with the constructor 79 | let mut impl_wrapper: syn::ItemImpl = syn::parse_quote!( 80 | impl LibraryWrapper { 81 | pub fn from_path(path : impl AsRef) -> Result { 82 | let inner = std::sync::Arc::new(libloading::Library::new(&path)?); 83 | Ok(Self { inner }) 84 | } 85 | } 86 | ); 87 | 88 | for func in raw_bindings { 89 | let output_type = match func.sig.output { 90 | syn::ReturnType::Default => Box::new(syn::parse_quote!(())), 91 | syn::ReturnType::Type(_, inner) => inner, 92 | }; 93 | let inpt = func.sig.inputs; 94 | let name = func.sig.ident; 95 | let name_str = 96 | proc_macro2::Literal::byte_string(format!("{}\0", name.to_string()).as_bytes()); 97 | 98 | let input_types = inpt 99 | .pairs() 100 | .filter_map(|pair| { 101 | let arg: &syn::FnArg = pair.value(); 102 | match arg { 103 | syn::FnArg::Receiver(_) => None, 104 | syn::FnArg::Typed(t) => Some(t.ty.to_owned()), 105 | } 106 | }) 107 | .collect::>(); 108 | let input_names = inpt 109 | .pairs() 110 | .filter_map(|pair| { 111 | let arg: &syn::FnArg = pair.value(); 112 | match arg { 113 | syn::FnArg::Receiver(_) => None, 114 | syn::FnArg::Typed(t) => Some(t.pat.to_owned()), 115 | } 116 | }) 117 | .collect::>(); 118 | let bd: syn::ImplItemMethod = syn::parse_quote!( 119 | pub unsafe fn #name (&self, #inpt) -> Result<#output_type, libloading::Error> { 120 | let dyn_symbol = self.inner.get:: #output_type>(#name_str)?; 121 | Ok( dyn_symbol(#input_names) ) 122 | } 123 | ); 124 | impl_wrapper.items.push(syn::ImplItem::Method(bd)); 125 | } 126 | vec![library_struct_wrapper, syn::Item::Impl(impl_wrapper)] 127 | } 128 | } 129 | 130 | fn write_to_file(path: impl AsRef, bindings: &bindgen::Bindings, dynamic: bool) { 131 | let mut file: syn::File = syn::parse_str(&bindings.to_string()).unwrap(); 132 | let externs = get_binding_functions(&mut file); 133 | let mapped = construct_bindings(externs, dynamic); 134 | file.items.extend(mapped.into_iter()); 135 | let output_text = add_spacings(file.to_token_stream()); 136 | std::fs::write(path, output_text.as_bytes()).unwrap(); 137 | } 138 | 139 | // Workaround for https://github.com/rust-lang/rust-bindgen/issues/1600 140 | fn add_spacings(tokens: TokenStream) -> String { 141 | let mut res_str = String::new(); 142 | struct TokenVisitor<'s> { 143 | s: &'s mut String, 144 | } 145 | impl<'s> TokenVisitor<'s> { 146 | fn visit_stream(&mut self, stream: TokenStream) { 147 | let mut iter = stream.into_iter().peekable(); 148 | while let Some(tree) = iter.next() { 149 | self.visit_tree(tree, iter.peek().is_some()); 150 | } 151 | } 152 | fn visit_tree(&mut self, tree: TokenTree, put_space: bool) { 153 | let mut put_space = put_space; 154 | match tree { 155 | TokenTree::Group(group) => match group.delimiter() { 156 | Delimiter::Brace => { 157 | *self.s += "{\n"; 158 | self.visit_stream(group.stream()); 159 | *self.s += "\n}\n"; 160 | } 161 | _ => *self.s += &group.to_string(), 162 | }, 163 | TokenTree::Ident(ident) => { 164 | *self.s += &ident.to_string(); 165 | } 166 | TokenTree::Punct(punct) => { 167 | if punct.spacing() == Spacing::Alone && punct.as_char() == ';' { 168 | *self.s += &punct.to_string(); 169 | *self.s += "\n"; 170 | } else { 171 | *self.s += &punct.to_string(); 172 | } 173 | if punct.spacing() == Spacing::Joint { 174 | put_space = false; 175 | } 176 | } 177 | TokenTree::Literal(l) => { 178 | *self.s += &l.to_string(); 179 | } 180 | } 181 | if put_space { 182 | *self.s += " "; 183 | } 184 | } 185 | } 186 | let mut visitor = TokenVisitor { s: &mut res_str }; 187 | visitor.visit_stream(tokens); 188 | res_str 189 | } 190 | 191 | fn main() { 192 | let bindings = bindgen::Builder::default() 193 | .header("stddef.h") 194 | .header("sys/deepspeech/native_client/deepspeech.h") 195 | .clang_args(&["-x", "c++", "-std=c++11"]) 196 | .generate() 197 | .expect("Couldn't generate bindings"); 198 | 199 | let out_path = PathBuf::from("sys/src"); 200 | write_to_file(out_path.join("bindings.rs"), &bindings, false); 201 | write_to_file(PathBuf::from("src/dynamic_bindings.rs"), &bindings, true); 202 | } 203 | -------------------------------------------------------------------------------- /ci.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Fail on error 4 | set -e 5 | 6 | # Verbose execution 7 | set -v 8 | 9 | dir=$(dirname "${BASH_SOURCE[0]}") 10 | release="v$(cat $dir/sys/deepspeech/VERSION)" 11 | echo "release is $release" 12 | 13 | # Download the native client 14 | client_dir="$dir/target/native_client" 15 | rm -rf $client_dir || true 16 | mkdir -p $client_dir 17 | pushd $client_dir 18 | wget https://github.com/mozilla/DeepSpeech/releases/download/$release/native_client.amd64.cpu.linux.tar.xz 19 | tar xf native_client.* 20 | popd 21 | #export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}$client_dir" 22 | export LIBRARY_PATH="${LIBRARY_PATH:+${LIBRARY_PATH}:}$client_dir" 23 | cargo test --all 24 | -------------------------------------------------------------------------------- /examples/client.rs: -------------------------------------------------------------------------------- 1 | extern crate deepspeech; 2 | extern crate audrey; 3 | extern crate dasp_interpolate; 4 | extern crate dasp_signal; 5 | 6 | use std::path::Path; 7 | use std::env::args; 8 | use std::fs::File; 9 | use std::time::Instant; 10 | 11 | use deepspeech::Model; 12 | use audrey::read::Reader; 13 | use dasp_interpolate::linear::Linear; 14 | use dasp_signal::{from_iter, Signal, interpolate::Converter}; 15 | 16 | // The model has been trained on this specific 17 | // sample rate. 18 | const SAMPLE_RATE :u32 = 16_000; 19 | 20 | /* 21 | TODO list: 22 | * better resampling (right now it seems that recognition is impaired compared to manual resampling)... 23 | maybe use sinc? 24 | * channel cropping 25 | * use clap or something to parse the command line arguments 26 | */ 27 | fn main() { 28 | let start = Instant::now(); 29 | let model_dir_str = args().nth(1) 30 | .expect("Please specify model dir"); 31 | let audio_file_path = args().nth(2) 32 | .expect("Please specify an audio file to run STT on"); 33 | let dir_path = Path::new(&model_dir_str); 34 | let mut graph_name: Box = dir_path.join("output_graph.pb").into_boxed_path(); 35 | let mut scorer_name: Option> = None; 36 | // search for model in model directory 37 | for file in dir_path.read_dir().expect("Specified model dir is not a dir") { 38 | if let Ok(f) = file { 39 | let file_path = f.path(); 40 | if file_path.is_file() { 41 | if let Some(ext) = file_path.extension() { 42 | if ext == "pb" || ext == "pbmm" || ext == "tflite" { 43 | graph_name = file_path.into_boxed_path(); 44 | } else if ext == "scorer" { 45 | scorer_name = Some(file_path.into_boxed_path()); 46 | } 47 | } 48 | } 49 | } 50 | } 51 | let mut m = Model::load_from_files(&graph_name).unwrap(); 52 | // enable external scorer if found in the model folder 53 | if let Some(scorer) = scorer_name { 54 | println!("Using external scorer `{}`", scorer.to_str().unwrap()); 55 | m.enable_external_scorer(&scorer).unwrap(); 56 | } 57 | 58 | let initialized_time = Instant::now(); 59 | println!("Model initialized in {:?}.", initialized_time - start); 60 | 61 | let audio_file = File::open(audio_file_path).unwrap(); 62 | let mut reader = Reader::new(audio_file).unwrap(); 63 | let desc = reader.description(); 64 | assert_eq!(1, desc.channel_count(), 65 | "The channel count is required to be one, at least for now"); 66 | 67 | // Obtain the buffer of samples 68 | let audio_buf :Vec<_> = if desc.sample_rate() == SAMPLE_RATE { 69 | reader.samples().map(|s| s.unwrap()).collect() 70 | } else { 71 | // We need to interpolate to the target sample rate 72 | let interpolator = Linear::new([0i16], [0]); 73 | let conv = Converter::from_hz_to_hz( 74 | from_iter(reader.samples::().map(|s| [s.unwrap()])), 75 | interpolator, 76 | desc.sample_rate() as f64, 77 | SAMPLE_RATE as f64); 78 | conv.until_exhausted().map(|v| v[0]).collect() 79 | }; 80 | 81 | let len_seconds = audio_buf.len() as f64 / SAMPLE_RATE as f64; 82 | 83 | let decoded_time = Instant::now(); 84 | 85 | println!("Decoding done in {:?}. Sample length {}s. Running STT.", 86 | decoded_time - initialized_time, len_seconds); 87 | 88 | // Run the speech to text algorithm 89 | let result = m.speech_to_text(&audio_buf).unwrap(); 90 | 91 | let text_time = Instant::now(); 92 | 93 | let elapsed = text_time - decoded_time; 94 | 95 | let elapsed_f = elapsed.subsec_micros() as f64 / 1_000_000.0 96 | + elapsed.as_secs() as f64; 97 | println!("STT done in {:?}. Real time factor {:.5}", elapsed, elapsed_f / len_seconds); 98 | 99 | // Output the result 100 | println!("{}", result); 101 | 102 | } 103 | -------------------------------------------------------------------------------- /examples/client_extended.rs: -------------------------------------------------------------------------------- 1 | extern crate deepspeech; 2 | extern crate audrey; 3 | extern crate dasp_interpolate; 4 | extern crate dasp_signal; 5 | 6 | use std::path::Path; 7 | use std::env::args; 8 | use std::fs::File; 9 | 10 | use deepspeech::Model; 11 | use deepspeech::CandidateTranscript; 12 | use audrey::read::Reader; 13 | use dasp_interpolate::linear::Linear; 14 | use dasp_signal::{from_iter, Signal, interpolate::Converter}; 15 | 16 | // The model has been trained on this specific 17 | // sample rate. 18 | const SAMPLE_RATE :u32 = 16_000; 19 | 20 | fn transcript_to_string(tr :&CandidateTranscript) -> String { 21 | let mut s = String::new(); 22 | for token in tr.tokens() { 23 | s += token.text().unwrap(); 24 | } 25 | s 26 | } 27 | 28 | /* 29 | TODO list: 30 | * better resampling (right now it seems that recognition is impaired compared to manual resampling)... 31 | maybe use sinc? 32 | * channel cropping 33 | * use clap or something to parse the command line arguments 34 | */ 35 | fn main() { 36 | let model_dir_str = args().nth(1) 37 | .expect("Please specify model dir"); 38 | let audio_file_path = args().nth(2) 39 | .expect("Please specify an audio file to run STT on"); 40 | let dir_path = Path::new(&model_dir_str); 41 | let mut graph_name: Box = dir_path.join("output_graph.pb").into_boxed_path(); 42 | let mut scorer_name: Option> = None; 43 | // search for model in model directory 44 | for file in dir_path.read_dir().expect("Specified model dir is not a dir") { 45 | if let Ok(f) = file { 46 | let file_path = f.path(); 47 | if file_path.is_file() { 48 | if let Some(ext) = file_path.extension() { 49 | if ext == "pb" || ext == "pbmm" || ext == "tflite" { 50 | graph_name = file_path.into_boxed_path(); 51 | } else if ext == "scorer" { 52 | scorer_name = Some(file_path.into_boxed_path()); 53 | } 54 | } 55 | } 56 | } 57 | } 58 | let mut m = Model::load_from_files(&graph_name).unwrap(); 59 | // enable external scorer if found in the model folder 60 | if let Some(scorer) = scorer_name { 61 | println!("Using external scorer `{}`", scorer.to_str().unwrap()); 62 | m.enable_external_scorer(&scorer).unwrap(); 63 | } 64 | 65 | let audio_file = File::open(audio_file_path).unwrap(); 66 | let mut reader = Reader::new(audio_file).unwrap(); 67 | let desc = reader.description(); 68 | assert_eq!(1, desc.channel_count(), 69 | "The channel count is required to be one, at least for now"); 70 | 71 | // Obtain the buffer of samples 72 | let audio_buf :Vec<_> = if desc.sample_rate() == SAMPLE_RATE { 73 | reader.samples().map(|s| s.unwrap()).collect() 74 | } else { 75 | // We need to interpolate to the target sample rate 76 | let interpolator = Linear::new([0i16], [0]); 77 | let conv = Converter::from_hz_to_hz( 78 | from_iter(reader.samples::().map(|s| [s.unwrap()])), 79 | interpolator, 80 | desc.sample_rate() as f64, 81 | SAMPLE_RATE as f64); 82 | conv.until_exhausted().map(|v| v[0]).collect() 83 | }; 84 | 85 | // Run the speech to text algorithm 86 | let metadata = m.speech_to_text_with_metadata(&audio_buf, 1).unwrap(); 87 | let result = transcript_to_string(&metadata.transcripts()[0]); 88 | 89 | // Output the result 90 | println!("{}", result); 91 | } 92 | -------------------------------------------------------------------------------- /examples/client_simple.rs: -------------------------------------------------------------------------------- 1 | extern crate deepspeech; 2 | extern crate audrey; 3 | extern crate dasp_interpolate; 4 | extern crate dasp_signal; 5 | 6 | use std::path::Path; 7 | use std::env::args; 8 | use std::fs::File; 9 | 10 | use deepspeech::Model; 11 | use audrey::read::Reader; 12 | use dasp_interpolate::linear::Linear; 13 | use dasp_signal::{from_iter, Signal, interpolate::Converter}; 14 | 15 | // The model has been trained on this specific 16 | // sample rate. 17 | const SAMPLE_RATE :u32 = 16_000; 18 | 19 | /* 20 | TODO list: 21 | * better resampling (right now it seems that recognition is impaired compared to manual resampling)... 22 | maybe use sinc? 23 | * channel cropping 24 | * use clap or something to parse the command line arguments 25 | */ 26 | fn main() { 27 | let model_dir_str = args().nth(1) 28 | .expect("Please specify model dir"); 29 | let audio_file_path = args().nth(2) 30 | .expect("Please specify an audio file to run STT on"); 31 | let dir_path = Path::new(&model_dir_str); 32 | let mut graph_name: Box = dir_path.join("output_graph.pb").into_boxed_path(); 33 | let mut scorer_name: Option> = None; 34 | // search for model in model directory 35 | for file in dir_path.read_dir().expect("Specified model dir is not a dir") { 36 | if let Ok(f) = file { 37 | let file_path = f.path(); 38 | if file_path.is_file() { 39 | if let Some(ext) = file_path.extension() { 40 | if ext == "pb" || ext == "pbmm" || ext == "tflite" { 41 | graph_name = file_path.into_boxed_path(); 42 | } else if ext == "scorer" { 43 | scorer_name = Some(file_path.into_boxed_path()); 44 | } 45 | } 46 | } 47 | } 48 | } 49 | let mut m = Model::load_from_files(&graph_name).unwrap(); 50 | // enable external scorer if found in the model folder 51 | if let Some(scorer) = scorer_name { 52 | println!("Using external scorer `{}`", scorer.to_str().unwrap()); 53 | m.enable_external_scorer(&scorer).unwrap(); 54 | } 55 | 56 | let audio_file = File::open(audio_file_path).unwrap(); 57 | let mut reader = Reader::new(audio_file).unwrap(); 58 | let desc = reader.description(); 59 | assert_eq!(1, desc.channel_count(), 60 | "The channel count is required to be one, at least for now"); 61 | 62 | // Obtain the buffer of samples 63 | let audio_buf :Vec<_> = if desc.sample_rate() == SAMPLE_RATE { 64 | reader.samples().map(|s| s.unwrap()).collect() 65 | } else { 66 | // We need to interpolate to the target sample rate 67 | let interpolator = Linear::new([0i16], [0]); 68 | let conv = Converter::from_hz_to_hz( 69 | from_iter(reader.samples::().map(|s| [s.unwrap()])), 70 | interpolator, 71 | desc.sample_rate() as f64, 72 | SAMPLE_RATE as f64); 73 | conv.until_exhausted().map(|v| v[0]).collect() 74 | }; 75 | 76 | // Run the speech to text algorithm 77 | let result = m.speech_to_text(&audio_buf).unwrap(); 78 | 79 | // Output the result 80 | println!("{}", result); 81 | } 82 | -------------------------------------------------------------------------------- /src/dynamic_bindings.rs: -------------------------------------------------------------------------------- 1 | pub type size_t = :: std :: os :: raw :: c_ulong ; 2 | # [repr (C)] # [repr (align (16))] # [derive (Debug , Copy , Clone)] pub struct max_align_t { 3 | pub __clang_max_align_nonce1 : :: std :: os :: raw :: c_longlong , pub __bindgen_padding_0 : u64 , pub __clang_max_align_nonce2 : u128 , 4 | } 5 | # [test] fn bindgen_test_layout_max_align_t () { 6 | assert_eq ! (:: std :: mem :: size_of ::< max_align_t > () , 32usize , concat ! ("Size of: " , stringify ! (max_align_t))) ; 7 | assert_eq ! (:: std :: mem :: align_of ::< max_align_t > () , 16usize , concat ! ("Alignment of " , stringify ! (max_align_t))) ; 8 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< max_align_t > ())) . __clang_max_align_nonce1 as * const _ as usize } , 0usize , concat ! ("Offset of field: " , stringify ! (max_align_t) , "::" , stringify ! (__clang_max_align_nonce1))) ; 9 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< max_align_t > ())) . __clang_max_align_nonce2 as * const _ as usize } , 16usize , concat ! ("Offset of field: " , stringify ! (max_align_t) , "::" , stringify ! (__clang_max_align_nonce2))) ; 10 | 11 | } 12 | # [repr (C)] # [derive (Debug , Copy , Clone)] pub struct ModelState { 13 | _unused : [u8 ; 0] , 14 | } 15 | # [repr (C)] # [derive (Debug , Copy , Clone)] pub struct StreamingState { 16 | _unused : [u8 ; 0] , 17 | } 18 | # [doc = " @brief Stores text of an individual token, along with its timing information"] # [repr (C)] # [derive (Debug , Copy , Clone)] pub struct TokenMetadata { 19 | # [doc = " The text corresponding to this token"] pub text : * const :: std :: os :: raw :: c_char , # [doc = " Position of the token in units of 20ms"] pub timestep : :: std :: os :: raw :: c_uint , # [doc = " Position of the token in seconds"] pub start_time : f32 , 20 | } 21 | # [test] fn bindgen_test_layout_TokenMetadata () { 22 | assert_eq ! (:: std :: mem :: size_of ::< TokenMetadata > () , 16usize , concat ! ("Size of: " , stringify ! (TokenMetadata))) ; 23 | assert_eq ! (:: std :: mem :: align_of ::< TokenMetadata > () , 8usize , concat ! ("Alignment of " , stringify ! (TokenMetadata))) ; 24 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< TokenMetadata > ())) . text as * const _ as usize } , 0usize , concat ! ("Offset of field: " , stringify ! (TokenMetadata) , "::" , stringify ! (text))) ; 25 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< TokenMetadata > ())) . timestep as * const _ as usize } , 8usize , concat ! ("Offset of field: " , stringify ! (TokenMetadata) , "::" , stringify ! (timestep))) ; 26 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< TokenMetadata > ())) . start_time as * const _ as usize } , 12usize , concat ! ("Offset of field: " , stringify ! (TokenMetadata) , "::" , stringify ! (start_time))) ; 27 | 28 | } 29 | # [doc = " @brief A single transcript computed by the model, including a confidence"] # [doc = " value and the metadata for its constituent tokens."] # [repr (C)] # [derive (Debug , Copy , Clone)] pub struct CandidateTranscript { 30 | # [doc = " Array of TokenMetadata objects"] pub tokens : * const TokenMetadata , # [doc = " Size of the tokens array"] pub num_tokens : :: std :: os :: raw :: c_uint , # [doc = " Approximated confidence value for this transcript. This is roughly the"] # [doc = " sum of the acoustic model logit values for each timestep/character that"] # [doc = " contributed to the creation of this transcript."] pub confidence : f64 , 31 | } 32 | # [test] fn bindgen_test_layout_CandidateTranscript () { 33 | assert_eq ! (:: std :: mem :: size_of ::< CandidateTranscript > () , 24usize , concat ! ("Size of: " , stringify ! (CandidateTranscript))) ; 34 | assert_eq ! (:: std :: mem :: align_of ::< CandidateTranscript > () , 8usize , concat ! ("Alignment of " , stringify ! (CandidateTranscript))) ; 35 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< CandidateTranscript > ())) . tokens as * const _ as usize } , 0usize , concat ! ("Offset of field: " , stringify ! (CandidateTranscript) , "::" , stringify ! (tokens))) ; 36 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< CandidateTranscript > ())) . num_tokens as * const _ as usize } , 8usize , concat ! ("Offset of field: " , stringify ! (CandidateTranscript) , "::" , stringify ! (num_tokens))) ; 37 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< CandidateTranscript > ())) . confidence as * const _ as usize } , 16usize , concat ! ("Offset of field: " , stringify ! (CandidateTranscript) , "::" , stringify ! (confidence))) ; 38 | 39 | } 40 | # [doc = " @brief An array of CandidateTranscript objects computed by the model."] # [repr (C)] # [derive (Debug , Copy , Clone)] pub struct Metadata { 41 | # [doc = " Array of CandidateTranscript objects"] pub transcripts : * const CandidateTranscript , # [doc = " Size of the transcripts array"] pub num_transcripts : :: std :: os :: raw :: c_uint , 42 | } 43 | # [test] fn bindgen_test_layout_Metadata () { 44 | assert_eq ! (:: std :: mem :: size_of ::< Metadata > () , 16usize , concat ! ("Size of: " , stringify ! (Metadata))) ; 45 | assert_eq ! (:: std :: mem :: align_of ::< Metadata > () , 8usize , concat ! ("Alignment of " , stringify ! (Metadata))) ; 46 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< Metadata > ())) . transcripts as * const _ as usize } , 0usize , concat ! ("Offset of field: " , stringify ! (Metadata) , "::" , stringify ! (transcripts))) ; 47 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< Metadata > ())) . num_transcripts as * const _ as usize } , 8usize , concat ! ("Offset of field: " , stringify ! (Metadata) , "::" , stringify ! (num_transcripts))) ; 48 | 49 | } 50 | pub const DeepSpeech_Error_Codes_DS_ERR_OK : DeepSpeech_Error_Codes = 0 ; 51 | pub const DeepSpeech_Error_Codes_DS_ERR_NO_MODEL : DeepSpeech_Error_Codes = 4096 ; 52 | pub const DeepSpeech_Error_Codes_DS_ERR_INVALID_ALPHABET : DeepSpeech_Error_Codes = 8192 ; 53 | pub const DeepSpeech_Error_Codes_DS_ERR_INVALID_SHAPE : DeepSpeech_Error_Codes = 8193 ; 54 | pub const DeepSpeech_Error_Codes_DS_ERR_INVALID_SCORER : DeepSpeech_Error_Codes = 8194 ; 55 | pub const DeepSpeech_Error_Codes_DS_ERR_MODEL_INCOMPATIBLE : DeepSpeech_Error_Codes = 8195 ; 56 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_NOT_ENABLED : DeepSpeech_Error_Codes = 8196 ; 57 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_UNREADABLE : DeepSpeech_Error_Codes = 8197 ; 58 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_INVALID_LM : DeepSpeech_Error_Codes = 8198 ; 59 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_NO_TRIE : DeepSpeech_Error_Codes = 8199 ; 60 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_INVALID_TRIE : DeepSpeech_Error_Codes = 8200 ; 61 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_VERSION_MISMATCH : DeepSpeech_Error_Codes = 8201 ; 62 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_INIT_MMAP : DeepSpeech_Error_Codes = 12288 ; 63 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_INIT_SESS : DeepSpeech_Error_Codes = 12289 ; 64 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_INTERPRETER : DeepSpeech_Error_Codes = 12290 ; 65 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_RUN_SESS : DeepSpeech_Error_Codes = 12291 ; 66 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_STREAM : DeepSpeech_Error_Codes = 12292 ; 67 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_READ_PROTOBUF : DeepSpeech_Error_Codes = 12293 ; 68 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_SESS : DeepSpeech_Error_Codes = 12294 ; 69 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_MODEL : DeepSpeech_Error_Codes = 12295 ; 70 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_INSERT_HOTWORD : DeepSpeech_Error_Codes = 12296 ; 71 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_CLEAR_HOTWORD : DeepSpeech_Error_Codes = 12297 ; 72 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_ERASE_HOTWORD : DeepSpeech_Error_Codes = 12304 ; 73 | pub type DeepSpeech_Error_Codes = :: std :: os :: raw :: c_uint ; 74 | # [derive (Clone)] pub struct LibraryWrapper { 75 | inner : std :: sync :: Arc < libloading :: Library > , 76 | } 77 | impl LibraryWrapper { 78 | pub fn from_path (path : impl AsRef < std :: ffi :: OsStr >) -> Result < Self , libloading :: Error > { 79 | let inner = std :: sync :: Arc :: new (libloading :: Library :: new (& path) ?) ; 80 | Ok (Self { inner }) 81 | } 82 | pub unsafe fn DS_ErrorCodeToErrorMessage (& self , aErrorCode : :: std :: os :: raw :: c_int ,) -> Result < * mut :: std :: os :: raw :: c_char , libloading :: Error > { 83 | let dyn_symbol = self . inner . get :: < unsafe extern fn (:: std :: os :: raw :: c_int) -> * mut :: std :: os :: raw :: c_char > (b"DS_ErrorCodeToErrorMessage\0") ? ; 84 | Ok (dyn_symbol (aErrorCode)) 85 | } 86 | pub unsafe fn DS_Version (& self ,) -> Result < * mut :: std :: os :: raw :: c_char , libloading :: Error > { 87 | let dyn_symbol = self . inner . get :: < unsafe extern fn () -> * mut :: std :: os :: raw :: c_char > (b"DS_Version\0") ? ; 88 | Ok (dyn_symbol ()) 89 | } 90 | pub unsafe fn DS_FreeString (& self , str_ : * mut :: std :: os :: raw :: c_char) -> Result < () , libloading :: Error > { 91 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut :: std :: os :: raw :: c_char) -> () > (b"DS_FreeString\0") ? ; 92 | Ok (dyn_symbol (str_)) 93 | } 94 | pub unsafe fn DS_FreeMetadata (& self , m : * mut Metadata) -> Result < () , libloading :: Error > { 95 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut Metadata) -> () > (b"DS_FreeMetadata\0") ? ; 96 | Ok (dyn_symbol (m)) 97 | } 98 | pub unsafe fn DS_FreeStream (& self , aSctx : * mut StreamingState) -> Result < () , libloading :: Error > { 99 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut StreamingState) -> () > (b"DS_FreeStream\0") ? ; 100 | Ok (dyn_symbol (aSctx)) 101 | } 102 | pub unsafe fn DS_FinishStreamWithMetadata (& self , aSctx : * mut StreamingState , aNumResults : :: std :: os :: raw :: c_uint ,) -> Result < * mut Metadata , libloading :: Error > { 103 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut StreamingState , :: std :: os :: raw :: c_uint) -> * mut Metadata > (b"DS_FinishStreamWithMetadata\0") ? ; 104 | Ok (dyn_symbol (aSctx , aNumResults)) 105 | } 106 | pub unsafe fn DS_FinishStream (& self , aSctx : * mut StreamingState) -> Result < * mut :: std :: os :: raw :: c_char , libloading :: Error > { 107 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut StreamingState) -> * mut :: std :: os :: raw :: c_char > (b"DS_FinishStream\0") ? ; 108 | Ok (dyn_symbol (aSctx)) 109 | } 110 | pub unsafe fn DS_IntermediateDecodeWithMetadata (& self , aSctx : * const StreamingState , aNumResults : :: std :: os :: raw :: c_uint ,) -> Result < * mut Metadata , libloading :: Error > { 111 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* const StreamingState , :: std :: os :: raw :: c_uint) -> * mut Metadata > (b"DS_IntermediateDecodeWithMetadata\0") ? ; 112 | Ok (dyn_symbol (aSctx , aNumResults)) 113 | } 114 | pub unsafe fn DS_IntermediateDecode (& self , aSctx : * const StreamingState) -> Result < * mut :: std :: os :: raw :: c_char , libloading :: Error > { 115 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* const StreamingState) -> * mut :: std :: os :: raw :: c_char > (b"DS_IntermediateDecode\0") ? ; 116 | Ok (dyn_symbol (aSctx)) 117 | } 118 | pub unsafe fn DS_FeedAudioContent (& self , aSctx : * mut StreamingState , aBuffer : * const :: std :: os :: raw :: c_short , aBufferSize : :: std :: os :: raw :: c_uint ,) -> Result < () , libloading :: Error > { 119 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut StreamingState , * const :: std :: os :: raw :: c_short , :: std :: os :: raw :: c_uint) -> () > (b"DS_FeedAudioContent\0") ? ; 120 | Ok (dyn_symbol (aSctx , aBuffer , aBufferSize)) 121 | } 122 | pub unsafe fn DS_CreateStream (& self , aCtx : * mut ModelState , retval : * mut * mut StreamingState ,) -> Result < :: std :: os :: raw :: c_int , libloading :: Error > { 123 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut ModelState , * mut * mut StreamingState) -> :: std :: os :: raw :: c_int > (b"DS_CreateStream\0") ? ; 124 | Ok (dyn_symbol (aCtx , retval)) 125 | } 126 | pub unsafe fn DS_SpeechToTextWithMetadata (& self , aCtx : * mut ModelState , aBuffer : * const :: std :: os :: raw :: c_short , aBufferSize : :: std :: os :: raw :: c_uint , aNumResults : :: std :: os :: raw :: c_uint ,) -> Result < * mut Metadata , libloading :: Error > { 127 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut ModelState , * const :: std :: os :: raw :: c_short , :: std :: os :: raw :: c_uint , :: std :: os :: raw :: c_uint) -> * mut Metadata > (b"DS_SpeechToTextWithMetadata\0") ? ; 128 | Ok (dyn_symbol (aCtx , aBuffer , aBufferSize , aNumResults)) 129 | } 130 | pub unsafe fn DS_SpeechToText (& self , aCtx : * mut ModelState , aBuffer : * const :: std :: os :: raw :: c_short , aBufferSize : :: std :: os :: raw :: c_uint ,) -> Result < * mut :: std :: os :: raw :: c_char , libloading :: Error > { 131 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut ModelState , * const :: std :: os :: raw :: c_short , :: std :: os :: raw :: c_uint) -> * mut :: std :: os :: raw :: c_char > (b"DS_SpeechToText\0") ? ; 132 | Ok (dyn_symbol (aCtx , aBuffer , aBufferSize)) 133 | } 134 | pub unsafe fn DS_SetScorerAlphaBeta (& self , aCtx : * mut ModelState , aAlpha : f32 , aBeta : f32 ,) -> Result < :: std :: os :: raw :: c_int , libloading :: Error > { 135 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut ModelState , f32 , f32) -> :: std :: os :: raw :: c_int > (b"DS_SetScorerAlphaBeta\0") ? ; 136 | Ok (dyn_symbol (aCtx , aAlpha , aBeta)) 137 | } 138 | pub unsafe fn DS_DisableExternalScorer (& self , aCtx : * mut ModelState) -> Result < :: std :: os :: raw :: c_int , libloading :: Error > { 139 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut ModelState) -> :: std :: os :: raw :: c_int > (b"DS_DisableExternalScorer\0") ? ; 140 | Ok (dyn_symbol (aCtx)) 141 | } 142 | pub unsafe fn DS_ClearHotWords (& self , aCtx : * mut ModelState) -> Result < :: std :: os :: raw :: c_int , libloading :: Error > { 143 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut ModelState) -> :: std :: os :: raw :: c_int > (b"DS_ClearHotWords\0") ? ; 144 | Ok (dyn_symbol (aCtx)) 145 | } 146 | pub unsafe fn DS_EraseHotWord (& self , aCtx : * mut ModelState , word : * const :: std :: os :: raw :: c_char ,) -> Result < :: std :: os :: raw :: c_int , libloading :: Error > { 147 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut ModelState , * const :: std :: os :: raw :: c_char) -> :: std :: os :: raw :: c_int > (b"DS_EraseHotWord\0") ? ; 148 | Ok (dyn_symbol (aCtx , word)) 149 | } 150 | pub unsafe fn DS_AddHotWord (& self , aCtx : * mut ModelState , word : * const :: std :: os :: raw :: c_char , boost : f32 ,) -> Result < :: std :: os :: raw :: c_int , libloading :: Error > { 151 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut ModelState , * const :: std :: os :: raw :: c_char , f32) -> :: std :: os :: raw :: c_int > (b"DS_AddHotWord\0") ? ; 152 | Ok (dyn_symbol (aCtx , word , boost)) 153 | } 154 | pub unsafe fn DS_EnableExternalScorer (& self , aCtx : * mut ModelState , aScorerPath : * const :: std :: os :: raw :: c_char ,) -> Result < :: std :: os :: raw :: c_int , libloading :: Error > { 155 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut ModelState , * const :: std :: os :: raw :: c_char) -> :: std :: os :: raw :: c_int > (b"DS_EnableExternalScorer\0") ? ; 156 | Ok (dyn_symbol (aCtx , aScorerPath)) 157 | } 158 | pub unsafe fn DS_FreeModel (& self , ctx : * mut ModelState) -> Result < () , libloading :: Error > { 159 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut ModelState) -> () > (b"DS_FreeModel\0") ? ; 160 | Ok (dyn_symbol (ctx)) 161 | } 162 | pub unsafe fn DS_GetModelSampleRate (& self , aCtx : * const ModelState) -> Result < :: std :: os :: raw :: c_int , libloading :: Error > { 163 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* const ModelState) -> :: std :: os :: raw :: c_int > (b"DS_GetModelSampleRate\0") ? ; 164 | Ok (dyn_symbol (aCtx)) 165 | } 166 | pub unsafe fn DS_SetModelBeamWidth (& self , aCtx : * mut ModelState , aBeamWidth : :: std :: os :: raw :: c_uint ,) -> Result < :: std :: os :: raw :: c_int , libloading :: Error > { 167 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* mut ModelState , :: std :: os :: raw :: c_uint) -> :: std :: os :: raw :: c_int > (b"DS_SetModelBeamWidth\0") ? ; 168 | Ok (dyn_symbol (aCtx , aBeamWidth)) 169 | } 170 | pub unsafe fn DS_GetModelBeamWidth (& self , aCtx : * const ModelState) -> Result < :: std :: os :: raw :: c_uint , libloading :: Error > { 171 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* const ModelState) -> :: std :: os :: raw :: c_uint > (b"DS_GetModelBeamWidth\0") ? ; 172 | Ok (dyn_symbol (aCtx)) 173 | } 174 | pub unsafe fn DS_CreateModel (& self , aModelPath : * const :: std :: os :: raw :: c_char , retval : * mut * mut ModelState ,) -> Result < :: std :: os :: raw :: c_int , libloading :: Error > { 175 | let dyn_symbol = self . inner . get :: < unsafe extern fn (* const :: std :: os :: raw :: c_char , * mut * mut ModelState) -> :: std :: os :: raw :: c_int > (b"DS_CreateModel\0") ? ; 176 | Ok (dyn_symbol (aModelPath , retval)) 177 | } 178 | 179 | } 180 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "static_bindings")] 2 | use deepspeech_sys as ds; 3 | 4 | #[cfg(all(feature = "dynamic", not(feature = "static_bindings")))] 5 | use crate::dynamic_bindings as ds; 6 | 7 | use self::ds::{ 8 | DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_MODEL, 9 | DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_SESS, 10 | DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_STREAM, DeepSpeech_Error_Codes_DS_ERR_FAIL_INIT_MMAP, 11 | DeepSpeech_Error_Codes_DS_ERR_FAIL_INIT_SESS, DeepSpeech_Error_Codes_DS_ERR_FAIL_INTERPRETER, 12 | DeepSpeech_Error_Codes_DS_ERR_FAIL_READ_PROTOBUF, DeepSpeech_Error_Codes_DS_ERR_FAIL_RUN_SESS, 13 | DeepSpeech_Error_Codes_DS_ERR_INVALID_ALPHABET, DeepSpeech_Error_Codes_DS_ERR_INVALID_SCORER, 14 | DeepSpeech_Error_Codes_DS_ERR_INVALID_SHAPE, DeepSpeech_Error_Codes_DS_ERR_MODEL_INCOMPATIBLE, 15 | DeepSpeech_Error_Codes_DS_ERR_NO_MODEL, DeepSpeech_Error_Codes_DS_ERR_SCORER_INVALID_LM, 16 | DeepSpeech_Error_Codes_DS_ERR_SCORER_INVALID_TRIE, 17 | DeepSpeech_Error_Codes_DS_ERR_SCORER_NOT_ENABLED, DeepSpeech_Error_Codes_DS_ERR_SCORER_NO_TRIE, 18 | DeepSpeech_Error_Codes_DS_ERR_SCORER_UNREADABLE, 19 | DeepSpeech_Error_Codes_DS_ERR_SCORER_VERSION_MISMATCH, 20 | }; 21 | 22 | use std::error::Error; 23 | use std::fmt::{Display, Formatter}; 24 | use std::str::Utf8Error; 25 | 26 | /// An error code returned from `libdeepspeech` itself. 27 | #[repr(u32)] 28 | #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Debug)] 29 | #[non_exhaustive] 30 | pub enum LibraryError { 31 | NoModel = DeepSpeech_Error_Codes_DS_ERR_NO_MODEL, 32 | InvalidAlphabet = DeepSpeech_Error_Codes_DS_ERR_INVALID_ALPHABET, 33 | InvalidShape = DeepSpeech_Error_Codes_DS_ERR_INVALID_SHAPE, 34 | InvalidScorer = DeepSpeech_Error_Codes_DS_ERR_INVALID_SCORER, 35 | ModelIncompatible = DeepSpeech_Error_Codes_DS_ERR_MODEL_INCOMPATIBLE, 36 | ScorerNotEnabled = DeepSpeech_Error_Codes_DS_ERR_SCORER_NOT_ENABLED, 37 | ScorerUnreadable = DeepSpeech_Error_Codes_DS_ERR_SCORER_UNREADABLE, 38 | ScorerInvalidLm = DeepSpeech_Error_Codes_DS_ERR_SCORER_INVALID_LM, 39 | ScorerNoTrie = DeepSpeech_Error_Codes_DS_ERR_SCORER_NO_TRIE, 40 | ScorerInvalidTrie = DeepSpeech_Error_Codes_DS_ERR_SCORER_INVALID_TRIE, 41 | ScorerVersionMismatch = DeepSpeech_Error_Codes_DS_ERR_SCORER_VERSION_MISMATCH, 42 | FailInitMmap = DeepSpeech_Error_Codes_DS_ERR_FAIL_INIT_MMAP, 43 | FailInitSess = DeepSpeech_Error_Codes_DS_ERR_FAIL_INIT_SESS, 44 | FailInterpreter = DeepSpeech_Error_Codes_DS_ERR_FAIL_INTERPRETER, 45 | FailRunSess = DeepSpeech_Error_Codes_DS_ERR_FAIL_RUN_SESS, 46 | FailCreateStream = DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_STREAM, 47 | FailReadProtobuf = DeepSpeech_Error_Codes_DS_ERR_FAIL_READ_PROTOBUF, 48 | FailCreateSess = DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_SESS, 49 | FailCreateModel = DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_MODEL, 50 | } 51 | 52 | impl LibraryError { 53 | /// Attempts to convert a raw error flag into a known error code. 54 | /// Returns `None` if `err_code` does not match a known error code value. 55 | pub fn from_code(err_code: ds::DeepSpeech_Error_Codes) -> Option { 56 | match err_code { 57 | ds::DeepSpeech_Error_Codes_DS_ERR_NO_MODEL => Some(LibraryError::NoModel), 58 | ds::DeepSpeech_Error_Codes_DS_ERR_INVALID_ALPHABET => { 59 | Some(LibraryError::InvalidAlphabet) 60 | } 61 | ds::DeepSpeech_Error_Codes_DS_ERR_INVALID_SHAPE => Some(LibraryError::InvalidShape), 62 | ds::DeepSpeech_Error_Codes_DS_ERR_INVALID_SCORER => Some(LibraryError::InvalidScorer), 63 | ds::DeepSpeech_Error_Codes_DS_ERR_MODEL_INCOMPATIBLE => { 64 | Some(LibraryError::ModelIncompatible) 65 | } 66 | ds::DeepSpeech_Error_Codes_DS_ERR_SCORER_NOT_ENABLED => { 67 | Some(LibraryError::ScorerNotEnabled) 68 | } 69 | ds::DeepSpeech_Error_Codes_DS_ERR_SCORER_UNREADABLE => { 70 | Some(LibraryError::ScorerUnreadable) 71 | } 72 | ds::DeepSpeech_Error_Codes_DS_ERR_SCORER_INVALID_LM => { 73 | Some(LibraryError::ScorerInvalidLm) 74 | } 75 | ds::DeepSpeech_Error_Codes_DS_ERR_SCORER_NO_TRIE => Some(LibraryError::ScorerNoTrie), 76 | ds::DeepSpeech_Error_Codes_DS_ERR_SCORER_INVALID_TRIE => { 77 | Some(LibraryError::ScorerInvalidTrie) 78 | } 79 | ds::DeepSpeech_Error_Codes_DS_ERR_SCORER_VERSION_MISMATCH => { 80 | Some(LibraryError::ScorerVersionMismatch) 81 | } 82 | ds::DeepSpeech_Error_Codes_DS_ERR_FAIL_INIT_MMAP => Some(LibraryError::FailInitMmap), 83 | ds::DeepSpeech_Error_Codes_DS_ERR_FAIL_INIT_SESS => Some(LibraryError::FailInitSess), 84 | ds::DeepSpeech_Error_Codes_DS_ERR_FAIL_INTERPRETER => { 85 | Some(LibraryError::FailInterpreter) 86 | } 87 | ds::DeepSpeech_Error_Codes_DS_ERR_FAIL_RUN_SESS => Some(LibraryError::FailRunSess), 88 | ds::DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_STREAM => { 89 | Some(LibraryError::FailCreateStream) 90 | } 91 | ds::DeepSpeech_Error_Codes_DS_ERR_FAIL_READ_PROTOBUF => { 92 | Some(LibraryError::FailReadProtobuf) 93 | } 94 | ds::DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_SESS => { 95 | Some(LibraryError::FailCreateSess) 96 | } 97 | ds::DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_MODEL => { 98 | Some(LibraryError::FailCreateModel) 99 | } 100 | _ => None, 101 | } 102 | } 103 | 104 | /// Converts this item into its error flag number. 105 | pub fn as_code(self) -> u32 { 106 | self as u32 107 | } 108 | } 109 | 110 | impl From for u32 { 111 | fn from(outer: LibraryError) -> Self { 112 | outer.as_code() 113 | } 114 | } 115 | 116 | impl Display for LibraryError { 117 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 118 | // Taken from the `DS_FOR_EACH_ERROR` macro in deepspeech/native_client/deepspeech.h 119 | let msg = match self { 120 | LibraryError::NoModel => "Missing model information.", 121 | LibraryError::InvalidAlphabet => { 122 | "Invalid alphabet embedded in model. (Data corruption?" 123 | } 124 | LibraryError::InvalidShape => "Invalid model shape.", 125 | LibraryError::InvalidScorer => "Invalid scorer file.", 126 | LibraryError::ModelIncompatible => "Incompatible model.", 127 | LibraryError::ScorerNotEnabled => "External scorer is not enabled.", 128 | LibraryError::ScorerUnreadable => "Could not read scorer file.", 129 | LibraryError::ScorerInvalidLm => "Could not recognize language model header in scorer.", 130 | LibraryError::ScorerNoTrie => { 131 | "Reached end of scorer file before loading vocabulary trie." 132 | } 133 | LibraryError::ScorerInvalidTrie => "Invalid magic in trie header.", 134 | LibraryError::ScorerVersionMismatch => { 135 | "Scorer file version does not match expected version." 136 | } 137 | LibraryError::FailInitMmap => "Failed to initialize memory mapped model.", 138 | LibraryError::FailInitSess => "Failed to initialize the session.", 139 | LibraryError::FailInterpreter => "Interpreter failed.", 140 | LibraryError::FailRunSess => "Failed to run the session.", 141 | LibraryError::FailCreateStream => "Error creating the stream.", 142 | LibraryError::FailReadProtobuf => "Error reading the proto buffer model file.", 143 | LibraryError::FailCreateSess => "Failed to create session.", 144 | LibraryError::FailCreateModel => "Could not allocate model state.", 145 | }; 146 | f.write_str("DeepspeechError: ")?; 147 | f.write_str(msg) 148 | } 149 | } 150 | 151 | impl Error for LibraryError {} 152 | 153 | #[derive(Debug)] 154 | #[non_exhaustive] 155 | pub enum DeepspeechError { 156 | 157 | /// A library call returned an error code. 158 | LibraryError(LibraryError), 159 | 160 | /// A library call returned an error flag that cannot be matched to an error code. 161 | UnknownLibraryError(u32), 162 | 163 | /// The library returned a string that could not be parsed as UTF8. 164 | ParseError(Utf8Error), 165 | 166 | #[cfg(feature = "dynamic")] 167 | /// `libloading` returned an error attempting to use the dynamic library. 168 | /// 169 | /// Only available with the `dynamic` feature. 170 | DynamicError(libloading::Error), 171 | } 172 | 173 | impl Display for DeepspeechError { 174 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 175 | match self { 176 | Self::LibraryError(inner) => Display::fmt(inner, f), 177 | Self::ParseError(inner) => Display::fmt(inner, f), 178 | Self::UnknownLibraryError(inner) => write!(f, "Unknown library error code: {}", inner), 179 | #[cfg(feature = "dynamic")] 180 | Self::DynamicError(inner) => Display::fmt(inner, f), 181 | } 182 | } 183 | } 184 | 185 | impl Error for DeepspeechError { 186 | fn source(&self) -> Option<&(dyn Error + 'static)> { 187 | match self { 188 | Self::LibraryError(inner) => Some(inner), 189 | Self::ParseError(inner) => Some(inner), 190 | Self::UnknownLibraryError(_) => None, 191 | #[cfg(feature = "dynamic")] 192 | Self::DynamicError(inner) => Some(inner), 193 | } 194 | } 195 | } 196 | 197 | impl From for DeepspeechError { 198 | fn from(inner: Utf8Error) -> Self { 199 | Self::ParseError(inner) 200 | } 201 | } 202 | impl From for DeepspeechError { 203 | fn from(inner: std::string::FromUtf8Error) -> Self { 204 | inner.utf8_error().into() 205 | } 206 | } 207 | 208 | impl From for DeepspeechError { 209 | fn from(code: u32) -> Self { 210 | match LibraryError::from_code(code) { 211 | Some(err) => Self::LibraryError(err), 212 | None => Self::UnknownLibraryError(code), 213 | } 214 | } 215 | } 216 | impl From for DeepspeechError { 217 | fn from(code: i32) -> Self { 218 | let code = code.abs(); 219 | code.into() 220 | } 221 | } 222 | 223 | #[cfg(feature = "dynamic")] 224 | impl From for DeepspeechError { 225 | fn from(inner: libloading::Error) -> Self { 226 | Self::DynamicError(inner) 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | Bindings to the DeepSpeech library 3 | */ 4 | 5 | #[cfg(feature = "dynamic")] 6 | mod dynamic_bindings; 7 | 8 | #[cfg(feature = "static_bindings")] 9 | extern crate deepspeech_sys; 10 | #[cfg(feature = "dynamic")] 11 | extern crate libloading; 12 | 13 | extern crate libc; 14 | 15 | #[cfg(any(feature = "static_bindings", feature = "dynamic"))] 16 | pub mod errors; 17 | #[cfg(any(feature = "static_bindings", feature = "dynamic"))] 18 | use errors::DeepspeechError; 19 | 20 | fn path_to_buf(p: &std::path::Path) -> Vec { 21 | let s = p.to_str().unwrap(); 22 | let mut v = Vec::with_capacity(s.len()); 23 | v.extend_from_slice(s.as_bytes()); 24 | v.push(0); 25 | v 26 | } 27 | 28 | macro_rules! impl_model { 29 | () => { 30 | pub struct Model { 31 | library: Library, 32 | model: *mut ds::ModelState, 33 | } 34 | impl Drop for Model { 35 | fn drop(&mut self) { 36 | unsafe { 37 | do_call!(&self.library, DS_FreeModel, self.model); 38 | } 39 | } 40 | } 41 | impl Model { 42 | /// Set hyperparameters alpha and beta of the external scorer 43 | pub fn set_scorer_alpha_beta( 44 | &mut self, 45 | alpha: f32, 46 | beta: f32, 47 | ) -> Result<(), DeepspeechError> { 48 | let ret = unsafe { 49 | do_call_with_res!( 50 | &self.library, 51 | DS_SetScorerAlphaBeta, 52 | self.model, 53 | alpha, 54 | beta 55 | ) 56 | }; 57 | if ret != 0 { 58 | Err(ret.into()) 59 | } else { 60 | Ok(()) 61 | } 62 | } 63 | /// Enable decoding using an external scorer 64 | pub fn enable_external_scorer( 65 | &mut self, 66 | scorer_path: &Path, 67 | ) -> Result<(), DeepspeechError> { 68 | let sp = path_to_buf(scorer_path); 69 | let ret = unsafe { 70 | do_call_with_res!( 71 | &self.library, 72 | DS_EnableExternalScorer, 73 | self.model, 74 | sp.as_ptr() as _ 75 | ) 76 | }; 77 | if ret != 0 { 78 | Err(ret.into()) 79 | } else { 80 | Ok(()) 81 | } 82 | } 83 | 84 | /// Get sample rate expected by a model 85 | pub fn get_sample_rate(&self) -> i32 { 86 | unsafe { do_call!(&self.library, DS_GetModelSampleRate, self.model) as _ } 87 | } 88 | 89 | /// Get beam width value the model is currently configured to use 90 | pub fn get_model_beam_width(&self) -> u16 { 91 | unsafe { do_call!(&self.library, DS_GetModelBeamWidth, self.model) as _ } 92 | } 93 | 94 | /// Set beam width value used by the model 95 | pub fn set_model_beam_width(&mut self, bw: u16) -> Result<(), DeepspeechError> { 96 | let ret = unsafe { 97 | do_call_with_res!(&self.library, DS_SetModelBeamWidth, self.model, bw as _) 98 | }; 99 | if ret != 0 { 100 | Err(ret.into()) 101 | } else { 102 | Ok(()) 103 | } 104 | } 105 | 106 | /// Disable decoding using an external scorer 107 | pub fn disable_external_scorer(&mut self) -> Result<(), DeepspeechError> { 108 | let ret = unsafe { 109 | do_call_with_res!(&self.library, DS_DisableExternalScorer, self.model) 110 | }; 111 | if ret != 0 { 112 | Err(ret.into()) 113 | } else { 114 | Ok(()) 115 | } 116 | } 117 | 118 | /// Perform speech-to-text using the model 119 | /// 120 | /// The input buffer must consist of mono 16-bit samples. 121 | /// The sample rate is not freely chooseable but a property 122 | /// of the model files. 123 | pub fn speech_to_text(&mut self, buffer: &[i16]) -> Result { 124 | let r = unsafe { 125 | let ptr = do_call!( 126 | &self.library, 127 | DS_SpeechToText, 128 | self.model, 129 | buffer.as_ptr(), 130 | buffer.len() as _ 131 | ); 132 | if ptr.is_null() { 133 | return Err(DeepspeechError::UnknownLibraryError(0)); 134 | } 135 | let s = CStr::from_ptr(ptr); 136 | let mut v = Vec::new(); 137 | v.extend_from_slice(s.to_bytes()); 138 | do_call!(&self.library, DS_FreeString, ptr); 139 | v 140 | }; 141 | String::from_utf8(r).map_err(|e| e.into()) 142 | } 143 | /// Perform speech-to-text using the model, getting extended metadata 144 | /// 145 | /// The input buffer must consist of mono 16-bit samples. 146 | /// The sample rate is not freely chooseable but a property 147 | /// of the model files. 148 | /// 149 | /// The `num_transcripts` param contains the maximum number of 150 | /// `CandidateTranscript`s to return. The actually returned number 151 | /// might be smaller. 152 | pub fn speech_to_text_with_metadata( 153 | &mut self, 154 | buffer: &[i16], 155 | num_transcripts: u16, 156 | ) -> Result { 157 | let ptr = unsafe { 158 | do_call_with_res!( 159 | &self.library, 160 | DS_SpeechToTextWithMetadata, 161 | self.model, 162 | buffer.as_ptr(), 163 | buffer.len() as _, 164 | num_transcripts as _ 165 | ) 166 | }; 167 | if ptr.is_null() { 168 | return Err(DeepspeechError::UnknownLibraryError(0)); 169 | } 170 | Ok(Metadata { 171 | library: self.library.clone(), 172 | metadata: ptr, 173 | }) 174 | } 175 | 176 | /// Set up a state for streaming inference 177 | pub fn create_stream(&mut self) -> Result { 178 | let mut ptr = ptr::null_mut(); 179 | let ret = unsafe { 180 | do_call_with_res!(&self.library, DS_CreateStream, self.model, &mut ptr) 181 | }; 182 | if ret != 0 { 183 | return Err(ret.into()); 184 | } 185 | Ok(Stream { 186 | library: self.library.clone(), 187 | stream: ptr, 188 | }) 189 | } 190 | } 191 | }; 192 | } 193 | 194 | macro_rules! impl_stream { 195 | () => { 196 | pub struct Stream { 197 | library: Library, 198 | stream: *mut ds::StreamingState, 199 | } 200 | impl Stream { 201 | /// Feed audio samples to the stream 202 | /// 203 | /// The input buffer must consist of mono 16-bit samples. 204 | pub fn feed_audio(&mut self, buffer: &[i16]) { 205 | unsafe { 206 | do_call!( 207 | &self.library, 208 | DS_FeedAudioContent, 209 | self.stream, 210 | buffer.as_ptr(), 211 | buffer.len() as _ 212 | ); 213 | } 214 | } 215 | 216 | /// Decodes the intermediate state of what has been spoken up until now 217 | /// 218 | /// Note that as of DeepSpeech version 0.2.0, 219 | /// this function is non-trivial as the decoder can't do streaming yet. 220 | pub fn intermediate_decode(&mut self) -> Result { 221 | let r = unsafe { 222 | let ptr = do_call!(&self.library, DS_IntermediateDecode, self.stream); 223 | if ptr.is_null() { 224 | return Err(DeepspeechError::UnknownLibraryError(0)); 225 | } 226 | let s = CStr::from_ptr(ptr); 227 | let mut v = Vec::new(); 228 | v.extend_from_slice(s.to_bytes()); 229 | do_call!(&self.library, DS_FreeString, ptr); 230 | v 231 | }; 232 | String::from_utf8(r).map_err(|e| e.into()) 233 | } 234 | 235 | /// Deallocates the stream and returns the decoded text 236 | pub fn finish(mut self) -> Result { 237 | let r = unsafe { 238 | let ptr = do_call!(&self.library, DS_FinishStream, self.stream); 239 | if ptr.is_null() { 240 | return Err(DeepspeechError::UnknownLibraryError(0)); 241 | } 242 | let s = CStr::from_ptr(ptr); 243 | let mut v = Vec::new(); 244 | v.extend_from_slice(s.to_bytes()); 245 | do_call!(&self.library, DS_FreeString, ptr); 246 | v 247 | }; 248 | unsafe { 249 | std::ptr::drop_in_place(&mut self.library); 250 | } 251 | // Don't run the destructor for self, 252 | // as DS_FinishStream already does it for us 253 | forget(self); 254 | String::from_utf8(r).map_err(|e| e.into()) 255 | } 256 | 257 | /// Deallocates the stream and returns the extended metadata 258 | /// 259 | /// The `num_transcripts` param contains the maximum number of 260 | /// `CandidateTranscript`s to return. The actually returned number 261 | /// might be smaller. 262 | pub fn finish_with_metadata( 263 | mut self, 264 | num_transcripts: u32, 265 | ) -> Result { 266 | let ptr = unsafe { 267 | do_call_with_res!( 268 | &self.library, 269 | DS_FinishStreamWithMetadata, 270 | self.stream, 271 | num_transcripts as _ 272 | ) 273 | }; 274 | if ptr.is_null() { 275 | return Err(DeepspeechError::UnknownLibraryError(0)); 276 | } 277 | let library = self.library.clone(); 278 | unsafe { 279 | std::ptr::drop_in_place(&mut self.library); 280 | } 281 | // Don't run the destructor for self, 282 | // as DS_FinishStream already does it for us 283 | forget(self); 284 | Ok(Metadata { 285 | library, 286 | metadata: ptr, 287 | }) 288 | } 289 | } 290 | 291 | impl Drop for Stream { 292 | fn drop(&mut self) { 293 | unsafe { 294 | do_call!(&self.library, DS_FreeStream, self.stream); 295 | } 296 | } 297 | } 298 | }; 299 | } 300 | 301 | macro_rules! impl_metadata { 302 | () => { 303 | pub struct Metadata { 304 | #[allow(dead_code)] 305 | library: Library, 306 | metadata: *mut ds::Metadata, 307 | } 308 | impl Drop for Metadata { 309 | fn drop(&mut self) { 310 | unsafe { 311 | do_call!(&self.library, DS_FreeMetadata, self.metadata); 312 | } 313 | } 314 | } 315 | 316 | impl Metadata { 317 | pub fn transcripts(&self) -> &[CandidateTranscript] { 318 | unsafe { 319 | let ptr = (*self.metadata).transcripts as *const CandidateTranscript; 320 | slice::from_raw_parts(ptr, self.num_transcripts() as usize) 321 | } 322 | } 323 | 324 | pub fn num_transcripts(&self) -> u32 { 325 | unsafe { (*self.metadata).num_transcripts } 326 | } 327 | } 328 | }; 329 | } 330 | 331 | macro_rules! impl_token_metadata { 332 | () => { 333 | #[repr(transparent)] 334 | pub struct TokenMetadata { 335 | metadata_item: ds::TokenMetadata, 336 | } 337 | impl TokenMetadata { 338 | /// The text of the token generated for transcription 339 | pub fn text(&self) -> Result<&str, std::str::Utf8Error> { 340 | unsafe { 341 | let slice = CStr::from_ptr(self.metadata_item.text); 342 | slice.to_str() 343 | } 344 | } 345 | 346 | /// Position of the token in units of 20ms 347 | pub fn timestep(&self) -> u32 { 348 | self.metadata_item.timestep 349 | } 350 | 351 | /// Position of the token in seconds 352 | pub fn start_time(&self) -> f32 { 353 | self.metadata_item.start_time 354 | } 355 | } 356 | }; 357 | } 358 | 359 | macro_rules! impl_candidate_transcript { 360 | () => { 361 | #[repr(transparent)] 362 | pub struct CandidateTranscript { 363 | transcript_item: ds::CandidateTranscript, 364 | } 365 | impl CandidateTranscript { 366 | pub fn tokens(&self) -> &[TokenMetadata] { 367 | unsafe { 368 | let ptr = self.transcript_item.tokens as *const TokenMetadata; 369 | slice::from_raw_parts(ptr, self.num_tokens() as usize) 370 | } 371 | } 372 | 373 | pub fn confidence(&self) -> f64 { 374 | self.transcript_item.confidence 375 | } 376 | 377 | pub fn num_tokens(&self) -> u32 { 378 | self.transcript_item.num_tokens 379 | } 380 | } 381 | 382 | impl fmt::Display for CandidateTranscript { 383 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 384 | let mut s = String::new(); 385 | for token in self.tokens() { 386 | s += token.text().unwrap(); 387 | } 388 | write!(f, "{}", s) 389 | } 390 | } 391 | }; 392 | } 393 | macro_rules! impl_deepspeech_version { 394 | ($($libname:ident, $library:path)?) => { 395 | pub fn deepspeech_version($($libname : $library),*) -> Result { 396 | let r = unsafe { 397 | let ptr = do_call!( (( $($libname)* )), DS_Version, ); 398 | let s = CStr::from_ptr(ptr); 399 | let mut v = Vec::new(); 400 | v.extend_from_slice(s.to_bytes()); 401 | do_call!((($($libname)*)), DS_FreeString, ptr); 402 | v 403 | }; 404 | String::from_utf8(r).map_err(|e| e.into()) 405 | } 406 | }; 407 | } 408 | 409 | #[cfg(feature = "static_bindings")] 410 | pub use self::static_bindings::*; 411 | #[cfg(feature = "static_bindings")] 412 | mod static_bindings { 413 | 414 | use deepspeech_sys as ds; 415 | use std::ffi::CStr; 416 | use std::fmt; 417 | use std::mem::forget; 418 | use std::ops::Drop; 419 | use std::path::Path; 420 | use std::ptr; 421 | use std::slice; 422 | 423 | use super::{path_to_buf, DeepspeechError}; 424 | 425 | type Library = (); 426 | 427 | macro_rules! do_call { 428 | ($this:expr, $f:ident, $($params:expr),*) => {{ 429 | ds::$f($($params),*) 430 | }} 431 | } 432 | macro_rules! do_call_with_res { 433 | ($this:expr, $f:ident, $($params:expr),*) => {{ 434 | ds::$f($($params),*) 435 | }} 436 | 437 | } 438 | impl_model!(); 439 | impl_metadata!(); 440 | impl_stream!(); 441 | impl_token_metadata!(); 442 | impl_candidate_transcript!(); 443 | impl_deepspeech_version!(); 444 | 445 | impl Model { 446 | /// Load a DeepSpeech model from the specified model file path 447 | pub fn load_from_files(model_path: &Path) -> Result { 448 | let mp = path_to_buf(model_path); 449 | let mut model = ptr::null_mut(); 450 | let ret = unsafe { ds::DS_CreateModel(mp.as_ptr() as _, &mut model) }; 451 | if ret != 0 { 452 | return Err(ret.into()); 453 | } 454 | Ok(Model { library: (), model }) 455 | } 456 | } 457 | } 458 | 459 | #[cfg(feature = "dynamic")] 460 | /// Wrappers that allow for using an arbitrary `libdeepspeech.so` file at runtime via the `libloading` crate. 461 | pub mod dynamic { 462 | use super::{path_to_buf, DeepspeechError}; 463 | use crate::dynamic_bindings as ds; 464 | pub use crate::dynamic_bindings::LibraryWrapper as Library; 465 | use std::ffi::CStr; 466 | use std::fmt; 467 | use std::mem::forget; 468 | use std::ops::Drop; 469 | use std::path::Path; 470 | use std::ptr; 471 | use std::slice; 472 | 473 | macro_rules! do_call { 474 | ($this:expr, $f:ident, $($params:expr),*) => {{ 475 | ds::LibraryWrapper::$f(&$this, $($params),*).unwrap() 476 | }} 477 | } 478 | macro_rules! do_call_with_res { 479 | ($this:expr, $f:ident, $($params:expr),*) => {{ 480 | ds::LibraryWrapper::$f(&$this, $($params),*)? 481 | }} 482 | 483 | } 484 | 485 | impl_model!(); 486 | impl_stream!(); 487 | impl_metadata!(); 488 | impl_token_metadata!(); 489 | impl_candidate_transcript!(); 490 | impl_deepspeech_version!(library, Library); 491 | impl Model { 492 | pub fn library(&self) -> Library { 493 | self.library.clone() 494 | } 495 | 496 | /// Load a DeepSpeech model from the specified shared library and model file paths 497 | pub fn load_from_files( 498 | library_path: &Path, 499 | model_path: &Path, 500 | ) -> Result { 501 | let library = ds::LibraryWrapper::from_path(library_path)?; 502 | let mp = path_to_buf(model_path); 503 | let mut model = ptr::null_mut(); 504 | let ret = unsafe { 505 | do_call_with_res!(&library, DS_CreateModel, mp.as_ptr() as _, &mut model) 506 | }; 507 | if ret != 0 { 508 | return Err(ret.into()); 509 | } 510 | Ok(Model { library, model }) 511 | } 512 | } 513 | } 514 | -------------------------------------------------------------------------------- /sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "deepspeech-sys" 3 | description = "Bindgen output for the DeepSpeech C++ API" 4 | license = "MPL-2.0" 5 | documentation = "https://docs.rs/deepspeech-sys" 6 | repository = "https://github.com/RustAudio/deepspeech-rs" 7 | version = "0.9.1" 8 | authors = ["est31 "] 9 | 10 | include = ["deepspeech/native_client/deepspeech.h", "/src/**", 11 | "/build.rs", "/LICENSE", "Cargo.toml"] 12 | 13 | [badges] 14 | maintenance = { status = "deprecated" } 15 | -------------------------------------------------------------------------------- /sys/LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /sys/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rustc-link-lib=deepspeech"); 3 | } 4 | -------------------------------------------------------------------------------- /sys/src/bindings.rs: -------------------------------------------------------------------------------- 1 | pub type size_t = :: std :: os :: raw :: c_ulong ; 2 | # [repr (C)] # [repr (align (16))] # [derive (Debug , Copy , Clone)] pub struct max_align_t { 3 | pub __clang_max_align_nonce1 : :: std :: os :: raw :: c_longlong , pub __bindgen_padding_0 : u64 , pub __clang_max_align_nonce2 : u128 , 4 | } 5 | # [test] fn bindgen_test_layout_max_align_t () { 6 | assert_eq ! (:: std :: mem :: size_of ::< max_align_t > () , 32usize , concat ! ("Size of: " , stringify ! (max_align_t))) ; 7 | assert_eq ! (:: std :: mem :: align_of ::< max_align_t > () , 16usize , concat ! ("Alignment of " , stringify ! (max_align_t))) ; 8 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< max_align_t > ())) . __clang_max_align_nonce1 as * const _ as usize } , 0usize , concat ! ("Offset of field: " , stringify ! (max_align_t) , "::" , stringify ! (__clang_max_align_nonce1))) ; 9 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< max_align_t > ())) . __clang_max_align_nonce2 as * const _ as usize } , 16usize , concat ! ("Offset of field: " , stringify ! (max_align_t) , "::" , stringify ! (__clang_max_align_nonce2))) ; 10 | 11 | } 12 | # [repr (C)] # [derive (Debug , Copy , Clone)] pub struct ModelState { 13 | _unused : [u8 ; 0] , 14 | } 15 | # [repr (C)] # [derive (Debug , Copy , Clone)] pub struct StreamingState { 16 | _unused : [u8 ; 0] , 17 | } 18 | # [doc = " @brief Stores text of an individual token, along with its timing information"] # [repr (C)] # [derive (Debug , Copy , Clone)] pub struct TokenMetadata { 19 | # [doc = " The text corresponding to this token"] pub text : * const :: std :: os :: raw :: c_char , # [doc = " Position of the token in units of 20ms"] pub timestep : :: std :: os :: raw :: c_uint , # [doc = " Position of the token in seconds"] pub start_time : f32 , 20 | } 21 | # [test] fn bindgen_test_layout_TokenMetadata () { 22 | assert_eq ! (:: std :: mem :: size_of ::< TokenMetadata > () , 16usize , concat ! ("Size of: " , stringify ! (TokenMetadata))) ; 23 | assert_eq ! (:: std :: mem :: align_of ::< TokenMetadata > () , 8usize , concat ! ("Alignment of " , stringify ! (TokenMetadata))) ; 24 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< TokenMetadata > ())) . text as * const _ as usize } , 0usize , concat ! ("Offset of field: " , stringify ! (TokenMetadata) , "::" , stringify ! (text))) ; 25 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< TokenMetadata > ())) . timestep as * const _ as usize } , 8usize , concat ! ("Offset of field: " , stringify ! (TokenMetadata) , "::" , stringify ! (timestep))) ; 26 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< TokenMetadata > ())) . start_time as * const _ as usize } , 12usize , concat ! ("Offset of field: " , stringify ! (TokenMetadata) , "::" , stringify ! (start_time))) ; 27 | 28 | } 29 | # [doc = " @brief A single transcript computed by the model, including a confidence"] # [doc = " value and the metadata for its constituent tokens."] # [repr (C)] # [derive (Debug , Copy , Clone)] pub struct CandidateTranscript { 30 | # [doc = " Array of TokenMetadata objects"] pub tokens : * const TokenMetadata , # [doc = " Size of the tokens array"] pub num_tokens : :: std :: os :: raw :: c_uint , # [doc = " Approximated confidence value for this transcript. This is roughly the"] # [doc = " sum of the acoustic model logit values for each timestep/character that"] # [doc = " contributed to the creation of this transcript."] pub confidence : f64 , 31 | } 32 | # [test] fn bindgen_test_layout_CandidateTranscript () { 33 | assert_eq ! (:: std :: mem :: size_of ::< CandidateTranscript > () , 24usize , concat ! ("Size of: " , stringify ! (CandidateTranscript))) ; 34 | assert_eq ! (:: std :: mem :: align_of ::< CandidateTranscript > () , 8usize , concat ! ("Alignment of " , stringify ! (CandidateTranscript))) ; 35 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< CandidateTranscript > ())) . tokens as * const _ as usize } , 0usize , concat ! ("Offset of field: " , stringify ! (CandidateTranscript) , "::" , stringify ! (tokens))) ; 36 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< CandidateTranscript > ())) . num_tokens as * const _ as usize } , 8usize , concat ! ("Offset of field: " , stringify ! (CandidateTranscript) , "::" , stringify ! (num_tokens))) ; 37 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< CandidateTranscript > ())) . confidence as * const _ as usize } , 16usize , concat ! ("Offset of field: " , stringify ! (CandidateTranscript) , "::" , stringify ! (confidence))) ; 38 | 39 | } 40 | # [doc = " @brief An array of CandidateTranscript objects computed by the model."] # [repr (C)] # [derive (Debug , Copy , Clone)] pub struct Metadata { 41 | # [doc = " Array of CandidateTranscript objects"] pub transcripts : * const CandidateTranscript , # [doc = " Size of the transcripts array"] pub num_transcripts : :: std :: os :: raw :: c_uint , 42 | } 43 | # [test] fn bindgen_test_layout_Metadata () { 44 | assert_eq ! (:: std :: mem :: size_of ::< Metadata > () , 16usize , concat ! ("Size of: " , stringify ! (Metadata))) ; 45 | assert_eq ! (:: std :: mem :: align_of ::< Metadata > () , 8usize , concat ! ("Alignment of " , stringify ! (Metadata))) ; 46 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< Metadata > ())) . transcripts as * const _ as usize } , 0usize , concat ! ("Offset of field: " , stringify ! (Metadata) , "::" , stringify ! (transcripts))) ; 47 | assert_eq ! (unsafe { & (* (:: std :: ptr :: null ::< Metadata > ())) . num_transcripts as * const _ as usize } , 8usize , concat ! ("Offset of field: " , stringify ! (Metadata) , "::" , stringify ! (num_transcripts))) ; 48 | 49 | } 50 | pub const DeepSpeech_Error_Codes_DS_ERR_OK : DeepSpeech_Error_Codes = 0 ; 51 | pub const DeepSpeech_Error_Codes_DS_ERR_NO_MODEL : DeepSpeech_Error_Codes = 4096 ; 52 | pub const DeepSpeech_Error_Codes_DS_ERR_INVALID_ALPHABET : DeepSpeech_Error_Codes = 8192 ; 53 | pub const DeepSpeech_Error_Codes_DS_ERR_INVALID_SHAPE : DeepSpeech_Error_Codes = 8193 ; 54 | pub const DeepSpeech_Error_Codes_DS_ERR_INVALID_SCORER : DeepSpeech_Error_Codes = 8194 ; 55 | pub const DeepSpeech_Error_Codes_DS_ERR_MODEL_INCOMPATIBLE : DeepSpeech_Error_Codes = 8195 ; 56 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_NOT_ENABLED : DeepSpeech_Error_Codes = 8196 ; 57 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_UNREADABLE : DeepSpeech_Error_Codes = 8197 ; 58 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_INVALID_LM : DeepSpeech_Error_Codes = 8198 ; 59 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_NO_TRIE : DeepSpeech_Error_Codes = 8199 ; 60 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_INVALID_TRIE : DeepSpeech_Error_Codes = 8200 ; 61 | pub const DeepSpeech_Error_Codes_DS_ERR_SCORER_VERSION_MISMATCH : DeepSpeech_Error_Codes = 8201 ; 62 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_INIT_MMAP : DeepSpeech_Error_Codes = 12288 ; 63 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_INIT_SESS : DeepSpeech_Error_Codes = 12289 ; 64 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_INTERPRETER : DeepSpeech_Error_Codes = 12290 ; 65 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_RUN_SESS : DeepSpeech_Error_Codes = 12291 ; 66 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_STREAM : DeepSpeech_Error_Codes = 12292 ; 67 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_READ_PROTOBUF : DeepSpeech_Error_Codes = 12293 ; 68 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_SESS : DeepSpeech_Error_Codes = 12294 ; 69 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_CREATE_MODEL : DeepSpeech_Error_Codes = 12295 ; 70 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_INSERT_HOTWORD : DeepSpeech_Error_Codes = 12296 ; 71 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_CLEAR_HOTWORD : DeepSpeech_Error_Codes = 12297 ; 72 | pub const DeepSpeech_Error_Codes_DS_ERR_FAIL_ERASE_HOTWORD : DeepSpeech_Error_Codes = 12304 ; 73 | pub type DeepSpeech_Error_Codes = :: std :: os :: raw :: c_uint ; 74 | extern "C" { 75 | # [doc = " @brief Returns a textual description corresponding to an error code."] # [doc = " The string returned must be freed with @{link DS_FreeString()}."] # [doc = ""] # [doc = " @return The error description."] pub fn DS_ErrorCodeToErrorMessage (aErrorCode : :: std :: os :: raw :: c_int ,) -> * mut :: std :: os :: raw :: c_char ; 76 | # [doc = " @brief Returns the version of this library. The returned version is a semantic"] # [doc = " version (SemVer 2.0.0). The string returned must be freed with {@link DS_FreeString()}."] # [doc = ""] # [doc = " @return The version string."] pub fn DS_Version () -> * mut :: std :: os :: raw :: c_char ; 77 | # [doc = " @brief Free a char* string returned by the DeepSpeech API."] pub fn DS_FreeString (str_ : * mut :: std :: os :: raw :: c_char) ; 78 | # [doc = " @brief Free memory allocated for metadata information."] pub fn DS_FreeMetadata (m : * mut Metadata) ; 79 | # [doc = " @brief Destroy a streaming state without decoding the computed logits. This"] # [doc = " can be used if you no longer need the result of an ongoing streaming"] # [doc = " inference and don't want to perform a costly decode operation."] # [doc = ""] # [doc = " @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}."] # [doc = ""] # [doc = " @note This method will free the state pointer (@p aSctx)."] pub fn DS_FreeStream (aSctx : * mut StreamingState) ; 80 | # [doc = " @brief Compute the final decoding of an ongoing streaming inference and return"] # [doc = " results including metadata. Signals the end of an ongoing streaming"] # [doc = " inference."] # [doc = ""] # [doc = " @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}."] # [doc = " @param aNumResults The number of candidate transcripts to return."] # [doc = ""] # [doc = " @return Metadata struct containing multiple candidate transcripts. Each transcript"] # [doc = " has per-token metadata including timing information. The user is"] # [doc = " responsible for freeing Metadata by calling {@link DS_FreeMetadata()}."] # [doc = " Returns NULL on error."] # [doc = ""] # [doc = " @note This method will free the state pointer (@p aSctx)."] pub fn DS_FinishStreamWithMetadata (aSctx : * mut StreamingState , aNumResults : :: std :: os :: raw :: c_uint ,) -> * mut Metadata ; 81 | # [doc = " @brief Compute the final decoding of an ongoing streaming inference and return"] # [doc = " the result. Signals the end of an ongoing streaming inference."] # [doc = ""] # [doc = " @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}."] # [doc = ""] # [doc = " @return The STT result. The user is responsible for freeing the string using"] # [doc = " {@link DS_FreeString()}."] # [doc = ""] # [doc = " @note This method will free the state pointer (@p aSctx)."] pub fn DS_FinishStream (aSctx : * mut StreamingState) -> * mut :: std :: os :: raw :: c_char ; 82 | # [doc = " @brief Compute the intermediate decoding of an ongoing streaming inference,"] # [doc = " return results including metadata."] # [doc = ""] # [doc = " @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}."] # [doc = " @param aNumResults The number of candidate transcripts to return."] # [doc = ""] # [doc = " @return Metadata struct containing multiple candidate transcripts. Each transcript"] # [doc = " has per-token metadata including timing information. The user is"] # [doc = " responsible for freeing Metadata by calling {@link DS_FreeMetadata()}."] # [doc = " Returns NULL on error."] pub fn DS_IntermediateDecodeWithMetadata (aSctx : * const StreamingState , aNumResults : :: std :: os :: raw :: c_uint ,) -> * mut Metadata ; 83 | # [doc = " @brief Compute the intermediate decoding of an ongoing streaming inference."] # [doc = ""] # [doc = " @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}."] # [doc = ""] # [doc = " @return The STT intermediate result. The user is responsible for freeing the"] # [doc = " string using {@link DS_FreeString()}."] pub fn DS_IntermediateDecode (aSctx : * const StreamingState) -> * mut :: std :: os :: raw :: c_char ; 84 | # [doc = " @brief Feed audio samples to an ongoing streaming inference."] # [doc = ""] # [doc = " @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}."] # [doc = " @param aBuffer An array of 16-bit, mono raw audio samples at the"] # [doc = " appropriate sample rate (matching what the model was trained on)."] # [doc = " @param aBufferSize The number of samples in @p aBuffer."] pub fn DS_FeedAudioContent (aSctx : * mut StreamingState , aBuffer : * const :: std :: os :: raw :: c_short , aBufferSize : :: std :: os :: raw :: c_uint ,) ; 85 | # [doc = " @brief Create a new streaming inference state. The streaming state returned"] # [doc = " by this function can then be passed to {@link DS_FeedAudioContent()}"] # [doc = " and {@link DS_FinishStream()}."] # [doc = ""] # [doc = " @param aCtx The ModelState pointer for the model to use."] # [doc = " @param[out] retval an opaque pointer that represents the streaming state. Can"] # [doc = " be NULL if an error occurs."] # [doc = ""] # [doc = " @return Zero for success, non-zero on failure."] pub fn DS_CreateStream (aCtx : * mut ModelState , retval : * mut * mut StreamingState ,) -> :: std :: os :: raw :: c_int ; 86 | # [doc = " @brief Use the DeepSpeech model to convert speech to text and output results"] # [doc = " including metadata."] # [doc = ""] # [doc = " @param aCtx The ModelState pointer for the model to use."] # [doc = " @param aBuffer A 16-bit, mono raw audio signal at the appropriate"] # [doc = " sample rate (matching what the model was trained on)."] # [doc = " @param aBufferSize The number of samples in the audio signal."] # [doc = " @param aNumResults The maximum number of CandidateTranscript structs to return. Returned value might be smaller than this."] # [doc = ""] # [doc = " @return Metadata struct containing multiple CandidateTranscript structs. Each"] # [doc = " transcript has per-token metadata including timing information. The"] # [doc = " user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}."] # [doc = " Returns NULL on error."] pub fn DS_SpeechToTextWithMetadata (aCtx : * mut ModelState , aBuffer : * const :: std :: os :: raw :: c_short , aBufferSize : :: std :: os :: raw :: c_uint , aNumResults : :: std :: os :: raw :: c_uint ,) -> * mut Metadata ; 87 | # [doc = " @brief Use the DeepSpeech model to convert speech to text."] # [doc = ""] # [doc = " @param aCtx The ModelState pointer for the model to use."] # [doc = " @param aBuffer A 16-bit, mono raw audio signal at the appropriate"] # [doc = " sample rate (matching what the model was trained on)."] # [doc = " @param aBufferSize The number of samples in the audio signal."] # [doc = ""] # [doc = " @return The STT result. The user is responsible for freeing the string using"] # [doc = " {@link DS_FreeString()}. Returns NULL on error."] pub fn DS_SpeechToText (aCtx : * mut ModelState , aBuffer : * const :: std :: os :: raw :: c_short , aBufferSize : :: std :: os :: raw :: c_uint ,) -> * mut :: std :: os :: raw :: c_char ; 88 | # [doc = " @brief Set hyperparameters alpha and beta of the external scorer."] # [doc = ""] # [doc = " @param aCtx The ModelState pointer for the model being changed."] # [doc = " @param aAlpha The alpha hyperparameter of the decoder. Language model weight."] # [doc = " @param aLMBeta The beta hyperparameter of the decoder. Word insertion weight."] # [doc = ""] # [doc = " @return Zero on success, non-zero on failure."] pub fn DS_SetScorerAlphaBeta (aCtx : * mut ModelState , aAlpha : f32 , aBeta : f32 ,) -> :: std :: os :: raw :: c_int ; 89 | # [doc = " @brief Disable decoding using an external scorer."] # [doc = ""] # [doc = " @param aCtx The ModelState pointer for the model being changed."] # [doc = ""] # [doc = " @return Zero on success, non-zero on failure."] pub fn DS_DisableExternalScorer (aCtx : * mut ModelState) -> :: std :: os :: raw :: c_int ; 90 | # [doc = " @brief Removes all elements from the hot-words map."] # [doc = ""] # [doc = " @param aCtx The ModelState pointer for the model being changed."] # [doc = ""] # [doc = " @return Zero on success, non-zero on failure (invalid arguments)."] pub fn DS_ClearHotWords (aCtx : * mut ModelState) -> :: std :: os :: raw :: c_int ; 91 | # [doc = " @brief Remove entry for a hot-word from the hot-words map."] # [doc = ""] # [doc = " @param aCtx The ModelState pointer for the model being changed."] # [doc = " @param word The hot-word."] # [doc = ""] # [doc = " @return Zero on success, non-zero on failure (invalid arguments)."] pub fn DS_EraseHotWord (aCtx : * mut ModelState , word : * const :: std :: os :: raw :: c_char ,) -> :: std :: os :: raw :: c_int ; 92 | # [doc = " @brief Add a hot-word and its boost."] # [doc = ""] # [doc = " @param aCtx The ModelState pointer for the model being changed."] # [doc = " @param word The hot-word."] # [doc = " @param boost The boost."] # [doc = ""] # [doc = " @return Zero on success, non-zero on failure (invalid arguments)."] pub fn DS_AddHotWord (aCtx : * mut ModelState , word : * const :: std :: os :: raw :: c_char , boost : f32 ,) -> :: std :: os :: raw :: c_int ; 93 | # [doc = " @brief Enable decoding using an external scorer."] # [doc = ""] # [doc = " @param aCtx The ModelState pointer for the model being changed."] # [doc = " @param aScorerPath The path to the external scorer file."] # [doc = ""] # [doc = " @return Zero on success, non-zero on failure (invalid arguments)."] pub fn DS_EnableExternalScorer (aCtx : * mut ModelState , aScorerPath : * const :: std :: os :: raw :: c_char ,) -> :: std :: os :: raw :: c_int ; 94 | # [doc = " @brief Frees associated resources and destroys model object."] pub fn DS_FreeModel (ctx : * mut ModelState) ; 95 | # [doc = " @brief Return the sample rate expected by a model."] # [doc = ""] # [doc = " @param aCtx A ModelState pointer created with {@link DS_CreateModel}."] # [doc = ""] # [doc = " @return Sample rate expected by the model for its input."] pub fn DS_GetModelSampleRate (aCtx : * const ModelState) -> :: std :: os :: raw :: c_int ; 96 | # [doc = " @brief Set beam width value used by the model."] # [doc = ""] # [doc = " @param aCtx A ModelState pointer created with {@link DS_CreateModel}."] # [doc = " @param aBeamWidth The beam width used by the model. A larger beam width value"] # [doc = " generates better results at the cost of decoding time."] # [doc = ""] # [doc = " @return Zero on success, non-zero on failure."] pub fn DS_SetModelBeamWidth (aCtx : * mut ModelState , aBeamWidth : :: std :: os :: raw :: c_uint ,) -> :: std :: os :: raw :: c_int ; 97 | # [doc = " @brief Get beam width value used by the model. If {@link DS_SetModelBeamWidth}"] # [doc = " was not called before, will return the default value loaded from the"] # [doc = " model file."] # [doc = ""] # [doc = " @param aCtx A ModelState pointer created with {@link DS_CreateModel}."] # [doc = ""] # [doc = " @return Beam width value used by the model."] pub fn DS_GetModelBeamWidth (aCtx : * const ModelState) -> :: std :: os :: raw :: c_uint ; 98 | # [doc = " @brief An object providing an interface to a trained DeepSpeech model."] # [doc = ""] # [doc = " @param aModelPath The path to the frozen model graph."] # [doc = " @param[out] retval a ModelState pointer"] # [doc = ""] # [doc = " @return Zero on success, non-zero on failure."] pub fn DS_CreateModel (aModelPath : * const :: std :: os :: raw :: c_char , retval : * mut * mut ModelState ,) -> :: std :: os :: raw :: c_int ; 99 | 100 | } 101 | -------------------------------------------------------------------------------- /sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_upper_case_globals)] 2 | #![allow(non_camel_case_types)] 3 | #![allow(non_snake_case)] 4 | #![allow(dead_code)] 5 | 6 | include!("./bindings.rs"); 7 | --------------------------------------------------------------------------------