├── .github └── pull_request_template.md ├── .gitignore ├── CODE_OF_CONDUCT.md ├── README.md ├── amharic └── itml │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── basque └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── bengali └── twb │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ ├── alphabet.txt │ └── corpus_wav-test.csv ├── breton └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── catalan └── ccoreilly │ └── v0.14.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── chatino └── bozden │ └── v1.0.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── chuvash └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── czech └── comodoro │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ ├── v0.2.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.3.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── dhivehi └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── dutch └── acabunoc │ └── v0.0.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── english └── coqui │ ├── v0.9.3 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ ├── v1.0.0-checkpoints │ ├── LICENSE │ ├── LOGS_TESTING_NO_LM.txt │ ├── MODEL_CARD.md │ └── alphabet.txt │ ├── v1.0.0-digits │ ├── LICENSE │ ├── MODEL_CARD.md │ └── digits.scorer │ ├── v1.0.0-huge-vocab │ ├── LOG_TESTING │ ├── MODEL_CARD.md │ └── alphabet.txt │ ├── v1.0.0-large-vocab │ ├── LICENSE │ ├── LOG_TESTING │ ├── MODEL_CARD.md │ └── alphabet.txt │ ├── v1.0.0-yesno │ ├── LICENSE │ ├── MODEL_CARD.md │ └── yesno.scorer │ └── yesno-v0.0.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── estonian └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── finnish └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── french ├── commonvoice-fr │ ├── v0.6 │ │ ├── LICENSE │ │ ├── MODEL_CARD.md │ │ └── alphabet.txt │ ├── v0.8 │ │ ├── LICENSE │ │ ├── MODEL_CARD.md │ │ └── alphabet.txt │ └── v0.9 │ │ ├── LICENSE │ │ ├── MODEL_CARD.md │ │ └── alphabet.txt └── jaco-assistant │ └── v0.0.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── frisian └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── georgian └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── german ├── aashishag │ └── v0.9.0 │ │ ├── LICENSE │ │ ├── MODEL_CARD.md │ │ └── alphabet.txt ├── jaco-assistant │ └── v0.0.1 │ │ ├── LICENSE │ │ ├── MODEL_CARD.md │ │ └── alphabet.txt └── yoummday │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── greek └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── hakha-chin └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── hindi └── bozden │ └── v0.8.99 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── hungarian └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── indonesian └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── irish └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── italian ├── jaco-assistant │ └── v0.0.1 │ │ ├── LICENSE │ │ ├── MODEL_CARD.md │ │ └── alphabet.txt └── mozillaitalia │ └── v2020.8.7 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── kinyarwanda └── digital-umuganda │ └── v0.0.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── komi └── itml │ └── v0.0.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── kyrgyz └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── latvian └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── lithuanian └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── luganda └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── maltese └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── mixtec └── jemeyer │ └── v1.0.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── mongolian └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── odia └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── persian └── oct4pie │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── polish └── jaco-assistant │ └── v0.0.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── portuguese └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── romanian └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── romansh-sursilvan └── itml │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── romansh-vallader └── itml │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── russian └── jemeyer │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── sakha └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── slovenian └── itml │ ├── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt │ └── v0.1.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── spanish └── jaco-assistant │ └── v0.0.1 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── swahili-congo └── twb │ └── v0.3.0 │ ├── LICENSE.txt │ ├── MODEL_CARD.md │ ├── alphabet.txt │ └── swc-tico-test.csv ├── swahili └── coqui │ └── v8.0-large-vocab │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── tamil └── itml │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── tatar └── itml │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── thai └── itml │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── totonac └── bozden │ └── v1.0.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── turkish └── itml │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── ukrainian └── robinhad │ └── v0.4 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── upper-sorbian └── itml │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── welsh └── techiaith │ └── v21.03 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt ├── wolof └── itml │ └── v0.1.0 │ ├── LICENSE │ ├── MODEL_CARD.md │ └── alphabet.txt └── yoruba └── itml └── v0.1.0 ├── LICENSE ├── MODEL_CARD.md └── alphabet.txt /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Pull request guidelines 2 | 3 | Welcome to the 🐸STT-models project! We are excited to see your interest, and we appreciate your support! 4 | 5 | This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file. 6 | 7 | Before accepting your pull request, you will be asked to sign a [Contributor License Agreement](https://cla-assistant.io/coqui-ai/STT-models). 8 | 9 | This [Contributor License Agreement](https://cla-assistant.io/coqui-ai/STT-models): 10 | 11 | - Protects you, Coqui, and the users of the code. 12 | - Does not change your rights to use your contributions for any purpose. 13 | - Does not change the license of the 🐸STT-models project. It just makes the terms of your contribution clearer and lets us know you are OK to contribute. 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pbmm 3 | *.tflite 4 | **/MODEL_CARD 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Coqui STT Models 2 | 3 | This repository tracks releases of open models for 🐸STT. 4 | 5 | Download models here: [coqui-ai/STT-models/releases](https://github.com/coqui-ai/STT-models/releases) 6 | -------------------------------------------------------------------------------- /amharic/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ሀ 3 | ሁ 4 | ሂ 5 | ሃ 6 | ሄ 7 | ህ 8 | ሆ 9 | ለ 10 | ሉ 11 | ሊ 12 | ላ 13 | ሌ 14 | ል 15 | ሎ 16 | ሏ 17 | ሐ 18 | ሑ 19 | ሒ 20 | ሓ 21 | ሔ 22 | ሕ 23 | ሖ 24 | ሗ 25 | መ 26 | ሙ 27 | ሚ 28 | ማ 29 | ሜ 30 | ም 31 | ሞ 32 | ሟ 33 | ሠ 34 | ሡ 35 | ሢ 36 | ሣ 37 | ሤ 38 | ሥ 39 | ሦ 40 | ሧ 41 | ረ 42 | ሩ 43 | ሪ 44 | ራ 45 | ሬ 46 | ር 47 | ሮ 48 | ሯ 49 | ሰ 50 | ሱ 51 | ሲ 52 | ሳ 53 | ሴ 54 | ስ 55 | ሶ 56 | ሷ 57 | ሸ 58 | ሹ 59 | ሺ 60 | ሻ 61 | ሼ 62 | ሽ 63 | ሾ 64 | ሿ 65 | ቀ 66 | ቁ 67 | ቂ 68 | ቃ 69 | ቄ 70 | ቅ 71 | ቆ 72 | ቈ 73 | ቊ 74 | ቋ 75 | ቌ 76 | ቍ 77 | በ 78 | ቡ 79 | ቢ 80 | ባ 81 | ቤ 82 | ብ 83 | ቦ 84 | ቧ 85 | ቨ 86 | ቩ 87 | ቪ 88 | ቫ 89 | ቬ 90 | ቭ 91 | ቮ 92 | ቯ 93 | ተ 94 | ቱ 95 | ቲ 96 | ታ 97 | ቴ 98 | ት 99 | ቶ 100 | ቷ 101 | ቸ 102 | ቹ 103 | ቺ 104 | ቻ 105 | ቼ 106 | ች 107 | ቾ 108 | ቿ 109 | ኀ 110 | ኁ 111 | ኂ 112 | ኃ 113 | ኄ 114 | ኅ 115 | ኆ 116 | ኈ 117 | ኊ 118 | ኋ 119 | ኌ 120 | ኍ 121 | ነ 122 | ኑ 123 | ኒ 124 | ና 125 | ኔ 126 | ን 127 | ኖ 128 | ኗ 129 | ኘ 130 | ኙ 131 | ኚ 132 | ኛ 133 | ኜ 134 | ኝ 135 | ኞ 136 | ኟ 137 | አ 138 | ኡ 139 | ኢ 140 | ኣ 141 | ኤ 142 | እ 143 | ኦ 144 | ኧ 145 | ከ 146 | ኩ 147 | ኪ 148 | ካ 149 | ኬ 150 | ክ 151 | ኮ 152 | ኰ 153 | ኲ 154 | ኳ 155 | ኴ 156 | ኵ 157 | ኸ 158 | ኹ 159 | ኺ 160 | ኻ 161 | ኼ 162 | ኽ 163 | ኾ 164 | ዀ 165 | ዂ 166 | ዃ 167 | ዄ 168 | ዅ 169 | ወ 170 | ዉ 171 | ዊ 172 | ዋ 173 | ዌ 174 | ው 175 | ዎ 176 | ዐ 177 | ዑ 178 | ዒ 179 | ዓ 180 | ዔ 181 | ዕ 182 | ዖ 183 | ዘ 184 | ዙ 185 | ዚ 186 | ዛ 187 | ዜ 188 | ዝ 189 | ዞ 190 | ዟ 191 | ዠ 192 | ዡ 193 | ዢ 194 | ዣ 195 | ዤ 196 | ዥ 197 | ዦ 198 | ዧ 199 | የ 200 | ዩ 201 | ዪ 202 | ያ 203 | ዬ 204 | ይ 205 | ዮ 206 | ደ 207 | ዱ 208 | ዲ 209 | ዳ 210 | ዴ 211 | ድ 212 | ዶ 213 | ዷ 214 | ጀ 215 | ጁ 216 | ጂ 217 | ጃ 218 | ጄ 219 | ጅ 220 | ጆ 221 | ጇ 222 | ገ 223 | ጉ 224 | ጊ 225 | ጋ 226 | ጌ 227 | ግ 228 | ጎ 229 | ጐ 230 | ጒ 231 | ጓ 232 | ጔ 233 | ጕ 234 | ጠ 235 | ጡ 236 | ጢ 237 | ጣ 238 | ጤ 239 | ጥ 240 | ጦ 241 | ጧ 242 | ጨ 243 | ጩ 244 | ጪ 245 | ጫ 246 | ጬ 247 | ጭ 248 | ጮ 249 | ጯ 250 | ጰ 251 | ጱ 252 | ጲ 253 | ጳ 254 | ጴ 255 | ጵ 256 | ጶ 257 | ጷ 258 | ጸ 259 | ጹ 260 | ጺ 261 | ጻ 262 | ጼ 263 | ጽ 264 | ጾ 265 | ጿ 266 | ፀ 267 | ፁ 268 | ፂ 269 | ፃ 270 | ፄ 271 | ፅ 272 | ፆ 273 | ፈ 274 | ፉ 275 | ፊ 276 | ፋ 277 | ፌ 278 | ፍ 279 | ፎ 280 | ፏ 281 | ፐ 282 | ፑ 283 | ፒ 284 | ፓ 285 | ፔ 286 | ፕ 287 | ፖ 288 | ፗ 289 | -------------------------------------------------------------------------------- /basque/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Basque STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Basque / Euskara / `eu` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{basque-stt, author = {Tyers,Francis}, title = {Basque STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-EU-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Basque Language](https://en.wikipedia.org/wiki/Basque_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/eu/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|81.0\%|19.9\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /basque/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | ñ 29 | -------------------------------------------------------------------------------- /basque/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Basque STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Basque / Euskara / `eu` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{basque-stt, author = {Tyers,Francis}, title = {Basque STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-EU-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Basque Language](https://en.wikipedia.org/wiki/Basque_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/eu/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|68.7\%|15.6\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /basque/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | ñ 29 | -------------------------------------------------------------------------------- /bengali/twb/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ঁ 3 | ং 4 | ঃ 5 | অ 6 | আ 7 | ই 8 | ঈ 9 | উ 10 | ঊ 11 | ঋ 12 | এ 13 | ঐ 14 | ও 15 | ঔ 16 | ক 17 | খ 18 | গ 19 | ঘ 20 | ঙ 21 | চ 22 | ছ 23 | জ 24 | ঝ 25 | ঞ 26 | ট 27 | ঠ 28 | ড 29 | ঢ 30 | ণ 31 | ত 32 | থ 33 | দ 34 | ধ 35 | ন 36 | প 37 | ফ 38 | ব 39 | ভ 40 | ম 41 | য 42 | র 43 | ল 44 | শ 45 | ষ 46 | স 47 | হ 48 | ় 49 | া 50 | ি 51 | ী 52 | ু 53 | ূ 54 | ৃ 55 | ে 56 | ৈ 57 | ো 58 | ৌ 59 | ্ 60 | ৎ 61 | ৗ 62 | ড় 63 | ঢ় 64 | য় 65 | ০ 66 | ১ 67 | ২ 68 | ৩ 69 | ৪ 70 | ৫ 71 | ৬ 72 | ৭ 73 | ৮ 74 | ৯ 75 | ৰ 76 | -------------------------------------------------------------------------------- /breton/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Breton STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Breton / Brezhoneg / `br` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{breton-stt, author = {Tyers,Francis}, title = {Breton STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-BR-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Breton Language](https://en.wikipedia.org/wiki/Breton_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|94.9\%|41.6\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /breton/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | - 4 | a 5 | b 6 | c 7 | d 8 | e 9 | f 10 | g 11 | h 12 | i 13 | j 14 | k 15 | l 16 | m 17 | n 18 | o 19 | p 20 | q 21 | r 22 | s 23 | t 24 | u 25 | v 26 | w 27 | x 28 | y 29 | z 30 | â 31 | ê 32 | î 33 | ñ 34 | ô 35 | ù 36 | û 37 | ü 38 | -------------------------------------------------------------------------------- /breton/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Breton STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Breton / Brezhoneg / `br` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{breton-stt, author = {Tyers,Francis}, title = {Breton STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-BR-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Breton Language](https://en.wikipedia.org/wiki/Breton_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/br/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|89.1\%|37.7\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /breton/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | - 4 | a 5 | b 6 | c 7 | d 8 | e 9 | f 10 | g 11 | h 12 | i 13 | j 14 | k 15 | l 16 | m 17 | n 18 | o 19 | p 20 | q 21 | r 22 | s 23 | t 24 | u 25 | v 26 | w 27 | x 28 | y 29 | z 30 | â 31 | ê 32 | î 33 | ñ 34 | ô 35 | ù 36 | û 37 | ü 38 | -------------------------------------------------------------------------------- /catalan/ccoreilly/v0.14.0/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Coqui GmbH 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /catalan/ccoreilly/v0.14.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | à 8 | b 9 | c 10 | ç 11 | d 12 | e 13 | è 14 | é 15 | f 16 | g 17 | h 18 | i 19 | í 20 | ï 21 | j 22 | k 23 | l 24 | m 25 | n 26 | o 27 | ò 28 | ó 29 | p 30 | q 31 | r 32 | s 33 | t 34 | u 35 | ú 36 | ü 37 | v 38 | w 39 | x 40 | y 41 | z 42 | ' 43 | - 44 | · 45 | # The last (non-comment) line needs to end with a newline. 46 | -------------------------------------------------------------------------------- /chatino/bozden/v1.0.0/LICENSE: -------------------------------------------------------------------------------- 1 | https://creativecommons.org/licenses/by-sa/4.0/ 2 | -------------------------------------------------------------------------------- /chatino/bozden/v1.0.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Western Highland Chatino STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Bülent Özden](https://twitter.com/bulentozden), a member of [Common Voice Türkçe](https://twitter.com/CVTurkce). 17 | - Model language: Western Highland Chatino / `ctp` 18 | - Model date: 12th April, 2022 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v1.0.0` 21 | - Compatible with STT version: `v1.3.0` 22 | - License: CC-BY-SA 4.0 23 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 24 | 25 | ## Intended use 26 | 27 | Speech-to-Text for [Western Highland Chatino](https://en.wikipedia.org/wiki/Highland_Chatino) on 16kHz, mono-channel audio. 28 | 29 | ## Performance Factors 30 | 31 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 32 | 33 | ## Metrics 34 | 35 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 36 | 37 | #### Transcription Accuracy 38 | 39 | |Test Corpus|WER|CER| 40 | |-----------|---|---| 41 | |GORILLA |77.2\%|30.9\%| 42 | 43 | #### Model Size 44 | 45 | `model.tflite`: 46M 46 | 47 | ### Approaches to uncertainty and variability 48 | 49 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 50 | 51 | ## Training data 52 | 53 | This model was trained on [GORILLA `ctp`](https://gorilla.linguistlist.org/code/ctp/) 54 | 55 | ### Citation 56 | 57 | * Malgorzata E. Cavar, Damir Cavar, Hilaria Cruz (2016) "Endangered Language Documentation: Bootstrapping a Chatino Speech Corpus, Forced Aligner, ASR". Pages 4004-4011 Of N. Calzolari (et al. eds) *Proceedings of the Tenth International Conference on Language Resources and Evaluation* (LREC 2016) in Portorož, Slovenia, European Language Resources Association (ELRA), Paris, France. 58 | 59 | ## Evaluation data 60 | 61 | This model was evaluated on [GORILLA `ctp`](https://gorilla.linguistlist.org/code/ctp/) 62 | 63 | ## Ethical considerations 64 | 65 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 66 | 67 | ### Demographic Bias 68 | 69 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 70 | 71 | ### Surveillance 72 | 73 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyse private speech. 74 | 75 | ## Caveats and recommendations 76 | 77 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 78 | 79 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 80 | -------------------------------------------------------------------------------- /chatino/bozden/v1.0.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric index. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' in the Alphabet. 5 | 6 | 0 7 | 1 8 | 2 9 | 3 10 | 4 11 | a 12 | b 13 | c 14 | d 15 | e 16 | f 17 | g 18 | h 19 | i 20 | j 21 | k 22 | l 23 | m 24 | n 25 | o 26 | p 27 | q 28 | r 29 | s 30 | t 31 | u 32 | v 33 | w 34 | x 35 | y 36 | z 37 | ñ 38 | õ 39 | ʼ 40 | # The last (non-comment) line needs to end with a newline. 41 | -------------------------------------------------------------------------------- /chuvash/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Chuvash STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Chuvash / Чӑвашла / `cv` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{chuvash-stt, author = {Tyers,Francis}, title = {Chuvash STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-CV-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Chuvash Language](https://en.wikipedia.org/wiki/Chuvash_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|97.0\%|36.9\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /chuvash/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | - 3 | а 4 | б 5 | в 6 | г 7 | д 8 | е 9 | ж 10 | з 11 | и 12 | й 13 | к 14 | л 15 | м 16 | н 17 | о 18 | п 19 | р 20 | с 21 | т 22 | у 23 | ф 24 | х 25 | ц 26 | ч 27 | ш 28 | щ 29 | ъ 30 | ы 31 | ь 32 | э 33 | ю 34 | я 35 | ё 36 | ҫ 37 | ӑ 38 | ӗ 39 | ӳ 40 | -------------------------------------------------------------------------------- /chuvash/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Chuvash STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Chuvash / Чӑвашла / `cv` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{chuvash-stt, author = {Tyers,Francis}, title = {Chuvash STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-CV-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Chuvash Language](https://en.wikipedia.org/wiki/Chuvash_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|95.4\%|33.7\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /chuvash/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | - 3 | а 4 | б 5 | в 6 | г 7 | д 8 | е 9 | ж 10 | з 11 | и 12 | й 13 | к 14 | л 15 | м 16 | н 17 | о 18 | п 19 | р 20 | с 21 | т 22 | у 23 | ф 24 | х 25 | ц 26 | ч 27 | ш 28 | щ 29 | ъ 30 | ы 31 | ь 32 | э 33 | ю 34 | я 35 | ё 36 | ҫ 37 | ӑ 38 | ӗ 39 | ӳ 40 | -------------------------------------------------------------------------------- /czech/comodoro/v0.1.0/LICENSE: -------------------------------------------------------------------------------- 1 | https://creativecommons.org/licenses/by-nc/4.0/legalcode 2 | -------------------------------------------------------------------------------- /czech/comodoro/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | - 3 | E 4 | I 5 | L 6 | N 7 | a 8 | b 9 | c 10 | d 11 | e 12 | f 13 | g 14 | h 15 | i 16 | j 17 | k 18 | l 19 | m 20 | n 21 | o 22 | p 23 | q 24 | r 25 | s 26 | t 27 | u 28 | v 29 | w 30 | x 31 | y 32 | z 33 | | 34 | á 35 | é 36 | í 37 | ó 38 | ú 39 | ü 40 | ý 41 | č 42 | ď 43 | ě 44 | ň 45 | ř 46 | š 47 | ť 48 | ů 49 | ž 50 | -------------------------------------------------------------------------------- /czech/comodoro/v0.2.0/LICENSE: -------------------------------------------------------------------------------- 1 | https://creativecommons.org/licenses/by-nc/4.0/legalcode 2 | -------------------------------------------------------------------------------- /czech/comodoro/v0.2.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | á 29 | é 30 | í 31 | ó 32 | ú 33 | ý 34 | č 35 | ď 36 | ě 37 | ň 38 | ř 39 | š 40 | ť 41 | ů 42 | ž 43 | -------------------------------------------------------------------------------- /czech/comodoro/v0.3.0/LICENSE: -------------------------------------------------------------------------------- 1 | https://creativecommons.org/licenses/by-nc/4.0/legalcode 2 | -------------------------------------------------------------------------------- /czech/comodoro/v0.3.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | á 29 | é 30 | í 31 | ó 32 | ú 33 | ý 34 | č 35 | ď 36 | ě 37 | ň 38 | ř 39 | š 40 | ť 41 | ů 42 | ž 43 | -------------------------------------------------------------------------------- /dhivehi/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Dhivehi STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Dhivehi / ދިވެހި / `dv` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{dhivehi-stt, author = {Tyers,Francis}, title = {Dhivehi STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-DV-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Dhivehi Language](https://en.wikipedia.org/wiki/Dhivehi_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|94.7\%|33.0\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /dhivehi/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ހ 3 | ށ 4 | ނ 5 | ރ 6 | ބ 7 | ޅ 8 | ކ 9 | އ 10 | ވ 11 | މ 12 | ފ 13 | ދ 14 | ތ 15 | ލ 16 | ގ 17 | ޏ 18 | ސ 19 | ޑ 20 | ޒ 21 | ޓ 22 | ޔ 23 | ޕ 24 | ޖ 25 | ޗ 26 | ޘ 27 | ޙ 28 | ޚ 29 | ޛ 30 | ޜ 31 | ޝ 32 | ޞ 33 | ޟ 34 | ޠ 35 | ޡ 36 | ޢ 37 | ޣ 38 | ޤ 39 | ޥ 40 | ަ 41 | ާ 42 | ި 43 | ީ 44 | ު 45 | ޫ 46 | ެ 47 | ޭ 48 | ޮ 49 | ޯ 50 | ް 51 | -------------------------------------------------------------------------------- /dhivehi/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Dhivehi STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Dhivehi / ދިވެހި / `dv` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{dhivehi-stt, author = {Tyers,Francis}, title = {Dhivehi STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-DV-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Dhivehi Language](https://en.wikipedia.org/wiki/Dhivehi_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|91.2\%|29.3\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /dhivehi/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ހ 3 | ށ 4 | ނ 5 | ރ 6 | ބ 7 | ޅ 8 | ކ 9 | އ 10 | ވ 11 | މ 12 | ފ 13 | ދ 14 | ތ 15 | ލ 16 | ގ 17 | ޏ 18 | ސ 19 | ޑ 20 | ޒ 21 | ޓ 22 | ޔ 23 | ޕ 24 | ޖ 25 | ޗ 26 | ޘ 27 | ޙ 28 | ޚ 29 | ޛ 30 | ޜ 31 | ޝ 32 | ޞ 33 | ޟ 34 | ޠ 35 | ޡ 36 | ޢ 37 | ޣ 38 | ޤ 39 | ޥ 40 | ަ 41 | ާ 42 | ި 43 | ީ 44 | ު 45 | ޫ 46 | ެ 47 | ޭ 48 | ޮ 49 | ޯ 50 | ް 51 | -------------------------------------------------------------------------------- /dutch/acabunoc/v0.0.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Dutch STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally released by [Abigail Cabunoc Mayes](https://github.com/acabunoc). 17 | - Model language: Dutch / Nederlands / `nl` 18 | - Model date: July 12, 2020 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.0.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: MPL 2.0 23 | - Citation details: `@techreport{dutch-stt, author = {Cabunoc Mayes,Abigail}, title = {Dutch STT 0.0.1}, institution = {Coqui}, address = {\url{https://coqui.ai/models}} year = {2020}, month = {July}, number = {STT-CV-NL-0.0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Dutch Language](https://en.wikipedia.org/wiki/Dutch_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported *using a language model*: [Github](https://github.com/acabunoc/Tutorial-train-dutch-model). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice 5.1|87.8\%|65.3\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 660K 55 | `model.tflite`: 221K 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 5.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 5.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /dutch/acabunoc/v0.0.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | ### Reading in the following transcript files: ### 2 | ### ['/content/nl/cv-corpus-5-2020-06-22/nl/clips/train.csv', '/content/nl/cv-corpus-5-2020-06-22/nl/clips/dev.csv', '/content/nl/cv-corpus-5-2020-06-22/nl/clips/test.csv'] ### 3 | ### The following unique characters were found in your transcripts: ### 4 | s 5 | 6 | j 7 | ç 8 | q 9 | é 10 | f 11 | i 12 | — 13 | h 14 | … 15 | m 16 | g 17 | u 18 | “ 19 | û 20 | p 21 | x 22 | ó 23 | ï 24 | ’ 25 | ê 26 | ü 27 | à 28 | a 29 | k 30 | c 31 | v 32 | t 33 | ' 34 | w 35 | n 36 | d 37 | e 38 | y 39 | ë 40 | r 41 | z 42 | ” 43 | ö 44 | î 45 | l 46 | o 47 | è 48 | b 49 | ### ^^^ You can copy-paste these into data/alphabet.txt ### 50 | -------------------------------------------------------------------------------- /english/coqui/v0.9.3/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for English STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Maintained by [Coqui](https://coqui.ai/). 17 | - Model language: English / English / `en` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.9.3` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: MPL 2.0 23 | - Citation details: `@techreport{english-stt, author = {Coqui}, title = {English STT 0.9.3}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-EN-0.9.3} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [English Language](https://en.wikipedia.org/wiki/English_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | More detail on model training and evaluation can be found in the [release notes](https://github.com/coqui-ai/STT/releases/tag/v0.9.3). 41 | 42 | #### Real-Time Factor 43 | 44 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 45 | 46 | Recorded average RTF on laptop CPU: `0.66` 47 | 48 | #### Model Size 49 | 50 | `model.pbmm`: 181M 51 | `model.tflite`: 46M 52 | 53 | ### Approaches to uncertainty and variability 54 | 55 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 56 | 57 | ## Training data 58 | 59 | This model was trained on the following corpora: Fisher, LibriSpeech, Switchboard, Common Voice English, and 1,700 hours of transcribed NPR (WAMU) radio shows explicitly licensed to use as training corpora. 60 | 61 | ## Evaluation data 62 | 63 | The Model was evaluated on the LibriSpeech clean dev corpus as validation data, and LibriSpeech clean test as testing data. 64 | 65 | ## Ethical considerations 66 | 67 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 68 | 69 | ### Demographic Bias 70 | 71 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 72 | 73 | ### Surveillance 74 | 75 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 76 | 77 | ## Caveats and recommendations 78 | 79 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 80 | 81 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 82 | -------------------------------------------------------------------------------- /english/coqui/v0.9.3/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | ' 33 | # The last (non-comment) line needs to end with a newline. 34 | -------------------------------------------------------------------------------- /english/coqui/v1.0.0-checkpoints/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | ' 33 | # The last (non-comment) line needs to end with a newline. 34 | -------------------------------------------------------------------------------- /english/coqui/v1.0.0-digits/digits.scorer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coqui-ai/STT-models/3b6b7d1b7066b419f48b508ba7436952158126fa/english/coqui/v1.0.0-digits/digits.scorer -------------------------------------------------------------------------------- /english/coqui/v1.0.0-huge-vocab/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for English STT v1.0.0 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Maintained by [Coqui](https://coqui.ai/). 17 | - Model language: English / English / `en` 18 | - Model date: October 3, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v1.0.0` 21 | - Compatible with 🐸 STT version: `v1.0.0` 22 | - License: Apache 2.0 23 | - Citation details: `@techreport{english-stt, author = {Coqui}, title = {English STT v1.0.0}, institution = {Coqui}, address = {\url{https://coqui.ai/models}} year = {2021}, month = {October}, number = {STT-EN-1.0.0} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT` issues](https://github.com/coqui-ai/STT/issues), open a new discussion on [`STT` discussions](https://github.com/coqui-ai/STT/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [English Language](https://en.wikipedia.org/wiki/English_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | Using the `huge-vocabulary.scorer` language model: 41 | 42 | - Librispeech clean: WER: 4.5\%, CER: 1.6\% 43 | - Librispeech clean: WER: 13.6\%, CER: 6.4\% 44 | 45 | #### Model Size 46 | 47 | For STT, you always must deploy an acoustic model, and it is often the case you also will want to deploy an application-specific language model. 48 | 49 | |Model type|Vocabulary|Filename|Size| 50 | ----------------|-----|----------------|-----| 51 | |Acoustic model | open | `model.tflite` | 181M| 52 | |Language model | large | `huge-vocabulary.scorer` |923M| 53 | 54 | ### Approaches to uncertainty and variability 55 | 56 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 57 | 58 | ## Training data 59 | 60 | This model was trained on the following corpora: Common Voice 7.0 English (custom Coqui train/dev/test splits), LibriSpeech, and Multilingual Librispeech. In total approximately ~47,000 hours of data. 61 | 62 | ## Evaluation data 63 | 64 | The validation ("dev") sets came from CV, Librispeech, and MLS. 65 | 66 | ## Ethical considerations 67 | 68 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 69 | 70 | ### Demographic Bias 71 | 72 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 73 | 74 | ### Surveillance 75 | 76 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 77 | 78 | ## Caveats and recommendations 79 | 80 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 81 | 82 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 83 | -------------------------------------------------------------------------------- /english/coqui/v1.0.0-huge-vocab/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | ' 33 | # The last (non-comment) line needs to end with a newline. 34 | -------------------------------------------------------------------------------- /english/coqui/v1.0.0-large-vocab/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | ' 33 | # The last (non-comment) line needs to end with a newline. 34 | -------------------------------------------------------------------------------- /english/coqui/v1.0.0-yesno/yesno.scorer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coqui-ai/STT-models/3b6b7d1b7066b419f48b508ba7436952158126fa/english/coqui/v1.0.0-yesno/yesno.scorer -------------------------------------------------------------------------------- /english/coqui/yesno-v0.0.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for English yesno STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Maintained by [Coqui](https://coqui.ai/). 17 | - Model language: English / English / `en` 18 | - Model date: July 26, 2021 19 | - Model type: `Speech-to-Text` / `constrained vocabulary` / `yesno` 20 | - Model version: `v0.0.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: Apache 2.0 23 | - Citation details: `@techreport{english-yesno-stt, author = {Coqui}, title = {English yesno STT v0.0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {July}, number = {STT-EN-YESNO-0.0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text `yesno` model for the [English Language](https://en.wikipedia.org/wiki/English_language) on 16kHz, mono-channel audio. This model has been trained to only recognize the two words "yes" and "no" in English. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The model was trained and evaluted on the Common Voice Target Segments Corpus, specifically, only on "yes" and "no" audio clips. 41 | 42 | |Test Corpus|Word Error Rate| 43 | |-------|----------| 44 | |Common Voice 6.1 (Target Segments Corpus "yes" and "no") | 1.6\% | 45 | 46 | #### Model Size 47 | 48 | `yesno.pbmm`: 319K 49 | `yesno.scorer`: 1.7K 50 | 51 | ### Approaches to uncertainty and variability 52 | 53 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 54 | 55 | ## Training data 56 | 57 | The model was trained and evaluted on the Common Voice Target Segments Corpus, specifically, only on "yes" and "no" audio clips. 58 | 59 | ## Evaluation data 60 | 61 | The model was trained and evaluted on the Common Voice Target Segments Corpus, specifically, only on "yes" and "no" audio clips. 62 | 63 | ## Ethical considerations 64 | 65 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 66 | 67 | ### Demographic Bias 68 | 69 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 70 | 71 | ### Surveillance 72 | 73 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 74 | 75 | ## Caveats and recommendations 76 | 77 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 78 | 79 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 80 | -------------------------------------------------------------------------------- /english/coqui/yesno-v0.0.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | y 2 | e 3 | s 4 | n 5 | o 6 | # 7 | -------------------------------------------------------------------------------- /estonian/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Estonian STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Estonian / Eesti / `et` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{estonian-stt, author = {Tyers,Francis}, title = {Estonian STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-ET-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Estonian Language](https://en.wikipedia.org/wiki/Estonian_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|92.2\%|29.5\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /estonian/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | ä 29 | õ 30 | ö 31 | ü 32 | š 33 | ž 34 | ̇ 35 | -------------------------------------------------------------------------------- /estonian/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Estonian STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Estonian / Eesti / `et` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{estonian-stt, author = {Tyers,Francis}, title = {Estonian STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-ET-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Estonian Language](https://en.wikipedia.org/wiki/Estonian_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|89.1\%|27.0\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /estonian/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | ä 29 | õ 30 | ö 31 | ü 32 | š 33 | ž 34 | ̇ 35 | -------------------------------------------------------------------------------- /finnish/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Finnish STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Finnish / Suomi / `fi` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{finnish-stt, author = {Tyers,Francis}, title = {Finnish STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-FI-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Finnish Language](https://en.wikipedia.org/wiki/Finnish_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|99.7\%|39.1\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /finnish/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | x 25 | y 26 | z 27 | ä 28 | ö 29 | -------------------------------------------------------------------------------- /finnish/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Finnish STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Finnish / Suomi / `fi` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{finnish-stt, author = {Tyers,Francis}, title = {Finnish STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-FI-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Finnish Language](https://en.wikipedia.org/wiki/Finnish_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|96.6\%|30.7\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /finnish/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | x 25 | y 26 | z 27 | ä 28 | ö 29 | -------------------------------------------------------------------------------- /french/commonvoice-fr/v0.6/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | a 4 | b 5 | c 6 | d 7 | e 8 | f 9 | g 10 | h 11 | i 12 | j 13 | k 14 | l 15 | m 16 | n 17 | o 18 | p 19 | q 20 | r 21 | s 22 | t 23 | u 24 | v 25 | w 26 | x 27 | y 28 | z 29 | ~ 30 | ® 31 | à 32 | á 33 | â 34 | ã 35 | ç 36 | è 37 | é 38 | ê 39 | ë 40 | í 41 | î 42 | ï 43 | ñ 44 | ò 45 | ó 46 | ô 47 | ö 48 | ù 49 | û 50 | ü 51 | ď 52 | ĩ 53 | ĺ 54 | ń 55 | ō 56 | œ 57 | ţ 58 | ũ 59 | ū 60 | ů 61 | ǎ 62 | ǔ 63 | ɔ 64 | ɛ 65 | ̐ 66 | ̲ 67 | д 68 | л 69 | п 70 | р 71 | і 72 | ј 73 | գ 74 | զ 75 | ẵ 76 | ề 77 | ờ 78 | ủ 79 | ‐ 80 | ― 81 | ₽ 82 | → 83 | ∆ 84 | − 85 | ∨ 86 | ─ 87 | ꝑ 88 | ÿ 89 | -------------------------------------------------------------------------------- /french/commonvoice-fr/v0.8/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | a 4 | b 5 | c 6 | d 7 | e 8 | f 9 | g 10 | h 11 | i 12 | j 13 | k 14 | l 15 | m 16 | n 17 | o 18 | p 19 | q 20 | r 21 | s 22 | t 23 | u 24 | v 25 | w 26 | x 27 | y 28 | z 29 | ~ 30 | ® 31 | à 32 | á 33 | â 34 | ã 35 | ç 36 | è 37 | é 38 | ê 39 | ë 40 | í 41 | î 42 | ï 43 | ñ 44 | ò 45 | ó 46 | ô 47 | ö 48 | ù 49 | û 50 | ü 51 | ď 52 | ĩ 53 | ĺ 54 | ń 55 | ō 56 | œ 57 | ţ 58 | ũ 59 | ū 60 | ŭ 61 | ů 62 | ű 63 | ŵ 64 | ǎ 65 | ǔ 66 | ɑ 67 | ɨ 68 | ʋ 69 | θ 70 | φ 71 | о 72 | п 73 | р 74 | ц 75 | ч 76 | э 77 | і 78 | ј 79 | џ 80 | ӌ 81 | գ 82 | զ 83 | ḥ 84 | ẓ 85 | ẵ 86 | ế 87 | ề 88 | ố 89 | ớ 90 | ờ 91 | ụ 92 | ủ 93 | ứ 94 | ‐ 95 | ― 96 | ₽ 97 | ∆ 98 | − 99 | ∨ 100 | ⊨ 101 | ⋅ 102 | ⱅ 103 | ⱎ 104 | 三 105 | 保 106 | 厳 107 | 宇 108 | 津 109 | ꝑ 110 | ÿ 111 | -------------------------------------------------------------------------------- /french/commonvoice-fr/v0.9/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric index. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' in the Alphabet. 5 | 6 | ' 7 | a 8 | b 9 | c 10 | d 11 | e 12 | f 13 | g 14 | h 15 | i 16 | j 17 | k 18 | l 19 | m 20 | n 21 | o 22 | p 23 | q 24 | r 25 | s 26 | t 27 | u 28 | v 29 | w 30 | x 31 | y 32 | z 33 | ~ 34 | ® 35 | à 36 | á 37 | â 38 | ã 39 | ç 40 | è 41 | é 42 | ê 43 | ë 44 | í 45 | î 46 | ï 47 | ñ 48 | ò 49 | ó 50 | ô 51 | ö 52 | ù 53 | û 54 | ü 55 | ď 56 | ĩ 57 | ĺ 58 | ń 59 | ō 60 | œ 61 | ţ 62 | ũ 63 | ū 64 | ŭ 65 | ů 66 | ű 67 | ŵ 68 | ǎ 69 | ǔ 70 | ɑ 71 | ɨ 72 | ʋ 73 | θ 74 | φ 75 | о 76 | п 77 | ц 78 | ч 79 | э 80 | і 81 | ј 82 | џ 83 | ӌ 84 | գ 85 | զ 86 | ḥ 87 | ẓ 88 | ẵ 89 | ế 90 | ề 91 | ố 92 | ớ 93 | ờ 94 | ụ 95 | ủ 96 | ứ 97 | ‐ 98 | ― 99 | ’ 100 | ₽ 101 | ∆ 102 | − 103 | ∨ 104 | ⋅ 105 | ⠈ 106 | ꝑ 107 | ÿ 108 | # The last (non-comment) line needs to end with a newline. -------------------------------------------------------------------------------- /french/jaco-assistant/v0.0.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | ' 33 | # The last (non-comment) line needs to end with a newline. 34 | -------------------------------------------------------------------------------- /frisian/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Frisian STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Frisian / Frysk / `fy-NL` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{frisian-stt, author = {Tyers,Francis}, title = {Frisian STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-FY_NL-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Frisian Language](https://en.wikipedia.org/wiki/Frisian_languages) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|79.6.\%|29.9\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `proccesing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /frisian/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | a 4 | b 5 | c 6 | d 7 | e 8 | f 9 | g 10 | h 11 | i 12 | j 13 | k 14 | l 15 | m 16 | n 17 | o 18 | p 19 | q 20 | r 21 | s 22 | t 23 | u 24 | v 25 | w 26 | x 27 | y 28 | z 29 | â 30 | é 31 | ê 32 | ô 33 | ú 34 | û 35 | -------------------------------------------------------------------------------- /frisian/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Frisian STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Frisian / Frysk / `fy-NL` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{frisian-stt, author = {Tyers,Francis}, title = {Frisian STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-FY_NL-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Frisian Language](https://en.wikipedia.org/wiki/Frisian_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|74.0\%|26.5\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `proccesing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /frisian/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | a 4 | b 5 | c 6 | d 7 | e 8 | f 9 | g 10 | h 11 | i 12 | j 13 | k 14 | l 15 | m 16 | n 17 | o 18 | p 19 | q 20 | r 21 | s 22 | t 23 | u 24 | v 25 | w 26 | x 27 | y 28 | z 29 | â 30 | é 31 | ê 32 | ô 33 | ú 34 | û 35 | -------------------------------------------------------------------------------- /georgian/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ა 3 | ბ 4 | გ 5 | დ 6 | ე 7 | ვ 8 | ზ 9 | თ 10 | ი 11 | კ 12 | ლ 13 | მ 14 | ნ 15 | ო 16 | პ 17 | ჟ 18 | რ 19 | ს 20 | ტ 21 | უ 22 | ფ 23 | ქ 24 | ღ 25 | ყ 26 | შ 27 | ჩ 28 | ც 29 | ძ 30 | წ 31 | ჭ 32 | ხ 33 | ჯ 34 | ჰ 35 | -------------------------------------------------------------------------------- /georgian/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ა 3 | ბ 4 | გ 5 | დ 6 | ე 7 | ვ 8 | ზ 9 | თ 10 | ი 11 | კ 12 | ლ 13 | მ 14 | ნ 15 | ო 16 | პ 17 | ჟ 18 | რ 19 | ს 20 | ტ 21 | უ 22 | ფ 23 | ქ 24 | ღ 25 | ყ 26 | შ 27 | ჩ 28 | ც 29 | ძ 30 | წ 31 | ჭ 32 | ხ 33 | ჯ 34 | ჰ 35 | -------------------------------------------------------------------------------- /german/aashishag/v0.9.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | ä 33 | ö 34 | ü 35 | ' 36 | # The last (non-comment) line needs to end with a newline. 37 | -------------------------------------------------------------------------------- /german/jaco-assistant/v0.0.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | ' 33 | # The last (non-comment) line needs to end with a newline. 34 | -------------------------------------------------------------------------------- /german/yoummday/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for German STT 2 |

3 | 4 |

Yoummday (The world’s first online marketplace for contact centers) puts its German speech model in your hands.

5 |

6 | 7 | Jump to section: 8 | 9 | - [Model details](#model-details) 10 | - [Intended use](#intended-use) 11 | - [Training data](#training-data) 12 | - [Evaluation data](#evaluation-data) 13 | - [Ethical considerations and disclaimer](#ethical-considerations-and-disclaimer) 14 | - [About Yoummday](#about-yoummday) 15 | 16 | ## Model details 17 | 18 | - Organization developing the model: trained by [Yoummday GmbH](https://www.yoummday.com/en/). 19 | - Model date: December 10, 2021 20 | - Model type: `Speech-to-Text` 21 | - Model version: `v0.1.0` 22 | - Compatible with 🐸 STT version: `v0.9.3` 23 | - Size: 24 | - `model.pbmm` -> 181M 25 | - `kenlm.scorer` -> 411M 26 | - License: [Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) ](https://creativecommons.org/licenses/by-nc/4.0/) 27 | 28 | ## Intended use 29 | 30 | The model is trained to work with Yoummday's telephone audio-data, hence the **8kHz** constraint. However, the model can be used in any Speech-To-Text (STT) application for the [German Language](https://en.wikipedia.org/wiki/German_language) on **8kHz, mono-channel audios**. 31 | 32 | ## Training data 33 | 34 | The following datasets were used during the training process: 35 | 36 | - Common Voice 37 | - Voxforge 38 | - Librivox 39 | - Forscher 40 | - Tatoeba 41 | - Tuda 42 | - Zamia 43 | - YouTube data. 44 | - Yoummday's data. 45 | 46 | ## Ethical considerations and disclaimer 47 | - Deploying a STT model has various ethical implications. Therefore, you should consider those implications alongside the privacy rules in your region before use. 48 | - Yoummday does not assume any liability nor responsibility for any misuse of the provided STT model. 49 | - Yoummday reserves the right to take legal actions against any misuse that might in any way negatively affect it. 50 | 51 | 52 | ## About Yoummday 53 | - Yoummday‘s concept is based on the sharing economy‘s principles. Among an innovative software solution, the marketplace is matching clients and freelancing call center agents whom we call talents. Through the concept, our platform provides a solution to work as a freelancing talent globally. Moreover, the provided technology enables the possibility for the talents to work from home. 54 | - *More about Yoummday can be found [here](https://www.yoummday.com/en/company).* 55 | 56 | 57 | 58 |

59 | 60 |

61 | -------------------------------------------------------------------------------- /german/yoummday/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | #start 2 | 3 | ä 4 | ü 5 | ö 6 | ß 7 | a 8 | b 9 | c 10 | d 11 | e 12 | f 13 | g 14 | h 15 | i 16 | j 17 | k 18 | l 19 | m 20 | n 21 | o 22 | p 23 | q 24 | r 25 | s 26 | t 27 | u 28 | v 29 | w 30 | x 31 | y 32 | z 33 | ' 34 | #end 35 | -------------------------------------------------------------------------------- /greek/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Greek STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Greek / Ελληνικά / `el` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{greek-stt, author = {Tyers,Francis}, title = {Greek STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-EL-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Greek Language](https://en.wikipedia.org/wiki/Greek_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|88.1\%|36.3\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /greek/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ΐ 3 | ά 4 | έ 5 | ή 6 | ί 7 | α 8 | β 9 | γ 10 | δ 11 | ε 12 | ζ 13 | η 14 | θ 15 | ι 16 | κ 17 | λ 18 | μ 19 | ν 20 | ξ 21 | ο 22 | π 23 | ρ 24 | ς 25 | σ 26 | τ 27 | υ 28 | φ 29 | χ 30 | ψ 31 | ω 32 | ϊ 33 | ϋ 34 | ό 35 | ύ 36 | ώ 37 | -------------------------------------------------------------------------------- /greek/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Greek STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Greek / Ελληνικά / `el` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{greek-stt, author = {Tyers,Francis}, title = {Greek STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-EL-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Greek Language](https://en.wikipedia.org/wiki/Greek_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/cv/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|80.2\%|31.2\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /greek/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ΐ 3 | ά 4 | έ 5 | ή 6 | ί 7 | α 8 | β 9 | γ 10 | δ 11 | ε 12 | ζ 13 | η 14 | θ 15 | ι 16 | κ 17 | λ 18 | μ 19 | ν 20 | ξ 21 | ο 22 | π 23 | ρ 24 | ς 25 | σ 26 | τ 27 | υ 28 | φ 29 | χ 30 | ψ 31 | ω 32 | ϊ 33 | ϋ 34 | ό 35 | ύ 36 | ώ 37 | -------------------------------------------------------------------------------- /hakha-chin/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | y 26 | z 27 | ṭ 28 | -------------------------------------------------------------------------------- /hakha-chin/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | y 26 | z 27 | ṭ 28 | -------------------------------------------------------------------------------- /hindi/bozden/v0.8.99/LICENSE: -------------------------------------------------------------------------------- 1 | https://creativecommons.org/licenses/by-nc-sa/3.0/ -------------------------------------------------------------------------------- /hindi/bozden/v0.8.99/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric index. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' in the Alphabet. 5 | 6 | ँ 7 | ं 8 | ः 9 | अ 10 | आ 11 | इ 12 | ई 13 | उ 14 | ऊ 15 | ए 16 | ऐ 17 | ऑ 18 | ओ 19 | औ 20 | क 21 | ख 22 | ग 23 | घ 24 | ङ 25 | च 26 | छ 27 | ज 28 | झ 29 | ञ 30 | ट 31 | ठ 32 | ड 33 | ढ 34 | ण 35 | त 36 | थ 37 | द 38 | ध 39 | न 40 | प 41 | फ 42 | ब 43 | भ 44 | म 45 | य 46 | र 47 | ल 48 | व 49 | श 50 | ष 51 | स 52 | ह 53 | ऺ 54 | ऻ 55 | ़ 56 | ऽ 57 | ा 58 | ि 59 | ी 60 | ु 61 | ू 62 | ृ 63 | ॄ 64 | ॅ 65 | ॆ 66 | े 67 | ै 68 | ॉ 69 | ॊ 70 | ो 71 | ौ 72 | ् 73 | ॎ 74 | ॏ 75 | ऋ 76 | # The last (non-comment) line needs to end with a newline. 77 | -------------------------------------------------------------------------------- /hungarian/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | á 29 | é 30 | í 31 | ó 32 | ö 33 | ú 34 | ü 35 | ő 36 | ű 37 | -------------------------------------------------------------------------------- /hungarian/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | á 29 | é 30 | í 31 | ó 32 | ö 33 | ú 34 | ü 35 | ő 36 | ű 37 | -------------------------------------------------------------------------------- /indonesian/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | r 19 | s 20 | t 21 | u 22 | v 23 | w 24 | x 25 | y 26 | z 27 | -------------------------------------------------------------------------------- /indonesian/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | r 19 | s 20 | t 21 | u 22 | v 23 | w 24 | x 25 | y 26 | z 27 | -------------------------------------------------------------------------------- /irish/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Irish STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Irish / Gaeilge / `ga-IE` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{irish-stt, author = {Tyers,Francis}, title = {Irish STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-GA_IE-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Irish Language](https://en.wikipedia.org/wiki/Irish_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/ga-IE/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|94.3\%|57.7\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /irish/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | - 4 | a 5 | b 6 | c 7 | d 8 | e 9 | f 10 | g 11 | h 12 | i 13 | j 14 | k 15 | l 16 | m 17 | n 18 | o 19 | p 20 | r 21 | s 22 | t 23 | u 24 | v 25 | w 26 | x 27 | y 28 | á 29 | é 30 | í 31 | ó 32 | ú 33 | -------------------------------------------------------------------------------- /irish/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Irish STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Irish / Gaeilge / `ga-IE` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{irish-stt, author = {Tyers,Francis}, title = {Irish STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-GA_IE-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Irish Language](https://en.wikipedia.org/wiki/Irish_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/ga-IE/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|86.9\%|40.6\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /irish/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | - 4 | a 5 | b 6 | c 7 | d 8 | e 9 | f 10 | g 11 | h 12 | i 13 | j 14 | k 15 | l 16 | m 17 | n 18 | o 19 | p 20 | r 21 | s 22 | t 23 | u 24 | v 25 | w 26 | x 27 | y 28 | á 29 | é 30 | í 31 | ó 32 | ú 33 | -------------------------------------------------------------------------------- /italian/jaco-assistant/v0.0.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | ' 33 | # The last (non-comment) line needs to end with a newline. 34 | -------------------------------------------------------------------------------- /italian/mozillaitalia/v2020.8.7/LICENSE: -------------------------------------------------------------------------------- 1 | https://creativecommons.org/publicdomain/zero/1.0/legalcode 2 | -------------------------------------------------------------------------------- /italian/mozillaitalia/v2020.8.7/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | a 4 | b 5 | c 6 | d 7 | e 8 | f 9 | g 10 | h 11 | i 12 | j 13 | k 14 | l 15 | m 16 | n 17 | o 18 | p 19 | q 20 | r 21 | s 22 | t 23 | u 24 | v 25 | w 26 | x 27 | y 28 | z 29 | à 30 | è 31 | é 32 | ì 33 | í 34 | ò 35 | ó 36 | ô 37 | ù 38 | ú 39 | -------------------------------------------------------------------------------- /kinyarwanda/digital-umuganda/v0.0.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | ' 33 | # The last (non-comment) line needs to end with a newline. 34 | -------------------------------------------------------------------------------- /komi/itml/v0.0.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | р 6 | ь 7 | щ 8 | ; 9 | э 10 | г 11 | ʼ 12 | ӯ 13 | е 14 | ) 15 | т 16 | ӵ 17 | с 18 | \# 19 | ч 20 | о 21 | ̄ 22 | « 23 | ѕ 24 | а 25 | » 26 | п 27 | ! 28 | я 29 | ж 30 | н 31 | ъ 32 | в 33 | к 34 | у 35 | : 36 | ш 37 | л 38 | ӈ 39 | ы 40 | ё 41 | ц 42 | ӥ 43 | б 44 | х 45 | / 46 | ӧ 47 | і 48 | й 49 | ā 50 | – 51 | — 52 | ˮ 53 | ф 54 | д 55 | м 56 | и 57 | 58 | з 59 | “ 60 | ю 61 | ### ^^^ You can copy-paste these into data/alphabet.txt ### 62 | -------------------------------------------------------------------------------- /kyrgyz/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Kyrgyz STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Kyrgyz / Кыргызча / `ky` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{kyrgyz-stt, author = {Tyers,Francis}, title = {Kyrgyz STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-KY-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Kyrgyz Language](https://en.wikipedia.org/wiki/Kyrgyz_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/ky/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|94.1\%|36.8\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /kyrgyz/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | а 3 | б 4 | в 5 | г 6 | д 7 | е 8 | ж 9 | з 10 | и 11 | й 12 | к 13 | л 14 | м 15 | н 16 | о 17 | п 18 | р 19 | с 20 | т 21 | у 22 | ф 23 | х 24 | ц 25 | ч 26 | ш 27 | щ 28 | ъ 29 | ы 30 | ь 31 | э 32 | ю 33 | я 34 | ё 35 | ң 36 | ү 37 | ө 38 | -------------------------------------------------------------------------------- /kyrgyz/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Kyrgyz STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Kyrgyz / Кыргызча / `ky` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{kyrgyz-stt, author = {Tyers,Francis}, title = {Kyrgyz STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-KY-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Kyrgyz Language](https://en.wikipedia.org/wiki/Kyrgyz_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/ky/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|87.1\%|30.5\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /kyrgyz/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | а 3 | б 4 | в 5 | г 6 | д 7 | е 8 | ж 9 | з 10 | и 11 | й 12 | к 13 | л 14 | м 15 | н 16 | о 17 | п 18 | р 19 | с 20 | т 21 | у 22 | ф 23 | х 24 | ц 25 | ч 26 | ш 27 | щ 28 | ъ 29 | ы 30 | ь 31 | э 32 | ю 33 | я 34 | ё 35 | ң 36 | ү 37 | ө 38 | -------------------------------------------------------------------------------- /latvian/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | r 19 | s 20 | t 21 | u 22 | v 23 | x 24 | z 25 | ā 26 | č 27 | ē 28 | ģ 29 | ī 30 | ķ 31 | ļ 32 | ņ 33 | š 34 | ū 35 | ž 36 | -------------------------------------------------------------------------------- /latvian/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | r 19 | s 20 | t 21 | u 22 | v 23 | x 24 | z 25 | ā 26 | č 27 | ē 28 | ģ 29 | ī 30 | ķ 31 | ļ 32 | ņ 33 | š 34 | ū 35 | ž 36 | -------------------------------------------------------------------------------- /lithuanian/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | ą 29 | č 30 | ė 31 | ę 32 | į 33 | š 34 | ū 35 | ų 36 | ž 37 | -------------------------------------------------------------------------------- /lithuanian/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | ą 29 | č 30 | ė 31 | ę 32 | į 33 | š 34 | ū 35 | ų 36 | ž 37 | -------------------------------------------------------------------------------- /luganda/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Luganda STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Luganda / Lugdanda / `lg` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{luganda-stt, author = {Tyers,Francis}, title = {Luganda STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-LG-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Luganda Language](https://en.wikipedia.org/wiki/Luganda_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/lg/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|97.7\%|33.2\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /luganda/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | a 4 | b 5 | c 6 | d 7 | e 8 | f 9 | g 10 | h 11 | i 12 | j 13 | k 14 | l 15 | m 16 | n 17 | o 18 | p 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | -------------------------------------------------------------------------------- /luganda/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Luganda STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Luganda / Lugdanda / `lg` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{luganda-stt, author = {Tyers,Francis}, title = {Luganda STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-LG-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Luganda Language](https://en.wikipedia.org/wiki/Luganda_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/lg/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|93.1\%|30.5\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /luganda/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | a 4 | b 5 | c 6 | d 7 | e 8 | f 9 | g 10 | h 11 | i 12 | j 13 | k 14 | l 15 | m 16 | n 17 | o 18 | p 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | -------------------------------------------------------------------------------- /maltese/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Maltese STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Maltese / Malti / `mt` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{maltese-stt, author = {Tyers,Francis}, title = {Maltese STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-MT-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Maltese Language](https://en.wikipedia.org/wiki/Maltese_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/mt/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|93.6\%|33.7\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /maltese/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | - 4 | a 5 | b 6 | c 7 | d 8 | e 9 | f 10 | g 11 | h 12 | i 13 | j 14 | k 15 | l 16 | m 17 | n 18 | o 19 | p 20 | q 21 | r 22 | s 23 | t 24 | u 25 | v 26 | w 27 | x 28 | y 29 | z 30 | à 31 | è 32 | ì 33 | ò 34 | ù 35 | ċ 36 | ġ 37 | ħ 38 | ż 39 | -------------------------------------------------------------------------------- /maltese/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Maltese STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Maltese / Malti / `mt` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{maltese-stt, author = {Tyers,Francis}, title = {Maltese STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-MT-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Maltese Language](https://en.wikipedia.org/wiki/Maltese_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/mt/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|86.4\%|27.9\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /maltese/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | - 4 | a 5 | b 6 | c 7 | d 8 | e 9 | f 10 | g 11 | h 12 | i 13 | j 14 | k 15 | l 16 | m 17 | n 18 | o 19 | p 20 | q 21 | r 22 | s 23 | t 24 | u 25 | v 26 | w 27 | x 28 | y 29 | z 30 | à 31 | è 32 | ì 33 | ò 34 | ù 35 | ċ 36 | ġ 37 | ħ 38 | ż 39 | -------------------------------------------------------------------------------- /mixtec/jemeyer/v1.0.0/LICENSE: -------------------------------------------------------------------------------- 1 | https://creativecommons.org/licenses/by-nc-sa/3.0/ -------------------------------------------------------------------------------- /mixtec/jemeyer/v1.0.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric index. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' in the Alphabet. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | á 33 | ã 34 | é 35 | í 36 | ñ 37 | ó 38 | ú 39 | ü 40 | ʼ 41 | # The last (non-comment) line needs to end with a newline. 42 | -------------------------------------------------------------------------------- /mongolian/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | а 3 | б 4 | в 5 | г 6 | д 7 | е 8 | ж 9 | з 10 | и 11 | й 12 | к 13 | л 14 | м 15 | н 16 | о 17 | п 18 | р 19 | с 20 | т 21 | у 22 | ф 23 | х 24 | ц 25 | ч 26 | ш 27 | щ 28 | ы 29 | ь 30 | э 31 | ю 32 | я 33 | ё 34 | ү 35 | ө 36 | -------------------------------------------------------------------------------- /mongolian/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | а 3 | б 4 | в 5 | г 6 | д 7 | е 8 | ж 9 | з 10 | и 11 | й 12 | к 13 | л 14 | м 15 | н 16 | о 17 | п 18 | р 19 | с 20 | т 21 | у 22 | ф 23 | х 24 | ц 25 | ч 26 | ш 27 | щ 28 | ы 29 | ь 30 | э 31 | ю 32 | я 33 | ё 34 | ү 35 | ө 36 | -------------------------------------------------------------------------------- /odia/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Odia STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Odia / ଓଡ଼ିଆ / `or` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{odia-stt, author = {Tyers,Francis}, title = {Odia STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-OR-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Odia Language](https://en.wikipedia.org/wiki/Odia_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/or/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|98.9\%|55.2\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /odia/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ଁ 3 | ଂ 4 | ଃ 5 | ଅ 6 | ଆ 7 | ଇ 8 | ଈ 9 | ଉ 10 | ଊ 11 | ଋ 12 | ଌ 13 | ଏ 14 | ଐ 15 | ଓ 16 | ଔ 17 | କ 18 | ଖ 19 | ଗ 20 | ଘ 21 | ଙ 22 | ଚ 23 | ଛ 24 | ଜ 25 | ଝ 26 | ଞ 27 | ଟ 28 | ଠ 29 | ଡ 30 | ଢ 31 | ଣ 32 | ତ 33 | ଥ 34 | ଦ 35 | ଧ 36 | ନ 37 | ପ 38 | ଫ 39 | ବ 40 | ଭ 41 | ମ 42 | ଯ 43 | ର 44 | ଲ 45 | ଳ 46 | ଵ 47 | ଶ 48 | ଷ 49 | ସ 50 | ହ 51 | ଼ 52 | ଽ 53 | ା 54 | ି 55 | ୀ 56 | ୁ 57 | ୂ 58 | ୃ 59 | ୄ 60 | େ 61 | ୈ 62 | ୋ 63 | ୌ 64 | ୍ 65 | ୕ 66 | ୖ 67 | ୗ 68 | ଡ଼ 69 | ଢ଼ 70 | ୟ 71 | ୠ 72 | ୡ 73 | ୢ 74 | ୣ 75 | ୰ 76 | ୱ 77 | ୲ 78 | ୳ 79 | ୴ 80 | ୵ 81 | ୶ 82 | ୷ 83 | -------------------------------------------------------------------------------- /odia/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Odia STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Odia / ଓଡ଼ିଆ / `or` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{odia-stt, author = {Tyers,Francis}, title = {Odia STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-OR-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Odia Language](https://en.wikipedia.org/wiki/Odia_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/or/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|95.0\%|35.0\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /odia/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ଁ 3 | ଂ 4 | ଃ 5 | ଅ 6 | ଆ 7 | ଇ 8 | ଈ 9 | ଉ 10 | ଊ 11 | ଋ 12 | ଌ 13 | ଏ 14 | ଐ 15 | ଓ 16 | ଔ 17 | କ 18 | ଖ 19 | ଗ 20 | ଘ 21 | ଙ 22 | ଚ 23 | ଛ 24 | ଜ 25 | ଝ 26 | ଞ 27 | ଟ 28 | ଠ 29 | ଡ 30 | ଢ 31 | ଣ 32 | ତ 33 | ଥ 34 | ଦ 35 | ଧ 36 | ନ 37 | ପ 38 | ଫ 39 | ବ 40 | ଭ 41 | ମ 42 | ଯ 43 | ର 44 | ଲ 45 | ଳ 46 | ଵ 47 | ଶ 48 | ଷ 49 | ସ 50 | ହ 51 | ଼ 52 | ଽ 53 | ା 54 | ି 55 | ୀ 56 | ୁ 57 | ୂ 58 | ୃ 59 | ୄ 60 | େ 61 | ୈ 62 | ୋ 63 | ୌ 64 | ୍ 65 | ୕ 66 | ୖ 67 | ୗ 68 | ଡ଼ 69 | ଢ଼ 70 | ୟ 71 | ୠ 72 | ୡ 73 | ୢ 74 | ୣ 75 | ୰ 76 | ୱ 77 | ୲ 78 | ୳ 79 | ୴ 80 | ୵ 81 | ୶ 82 | ୷ 83 | -------------------------------------------------------------------------------- /persian/oct4pie/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric index. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' in the Alphabet. 5 | 6 | ' 7 | ، 8 | ؛ 9 | ؟ 10 | ء 11 | آ 12 | أ 13 | ؤ 14 | ئ 15 | ا 16 | ب 17 | ة 18 | ت 19 | ث 20 | ج 21 | ح 22 | خ 23 | د 24 | ذ 25 | ر 26 | ز 27 | س 28 | ش 29 | ص 30 | ض 31 | ط 32 | ظ 33 | ع 34 | غ 35 | ـ 36 | ف 37 | ق 38 | ك 39 | ل 40 | م 41 | ن 42 | ه 43 | و 44 | ى 45 | ي 46 | ً 47 | ٌ 48 | َ 49 | ُ 50 | ِ 51 | ّ 52 | ْ 53 | ٔ 54 | ٬ 55 | پ 56 | چ 57 | ژ 58 | ک 59 | گ 60 | ۀ 61 | ی 62 | ے 63 | – 64 | “ 65 | ” 66 | … 67 | ﮐ 68 | ﮔ 69 | ﯾ 70 | ﯿ 71 | ﺍ 72 | ﺎ 73 | ﺑ 74 | ﺒ 75 | ﺖ 76 | ﺘ 77 | ﺧ 78 | ﺩ 79 | ﺪ 80 | ﺭ 81 | ﺮ 82 | ﺴ 83 | ﺷ 84 | ﺸ 85 | ﻋ 86 | ﻌ 87 | ﻢ 88 | ﻤ 89 | ﻥ 90 | ﻧ 91 | ﻭ 92 | ﻮ 93 | # The last (non-comment) line needs to end with a newline. 94 | -------------------------------------------------------------------------------- /polish/jaco-assistant/v0.0.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | ć 33 | ń 34 | ó 35 | ś 36 | ź 37 | ż 38 | ą 39 | ę 40 | ł 41 | # The last (non-comment) line needs to end with a newline. 42 | -------------------------------------------------------------------------------- /portuguese/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | - 4 | a 5 | b 6 | c 7 | d 8 | e 9 | f 10 | g 11 | h 12 | i 13 | j 14 | k 15 | l 16 | m 17 | n 18 | o 19 | p 20 | q 21 | r 22 | s 23 | t 24 | u 25 | v 26 | w 27 | x 28 | y 29 | z 30 | à 31 | á 32 | â 33 | ã 34 | ç 35 | é 36 | ê 37 | í 38 | ó 39 | ô 40 | õ 41 | ú 42 | ü 43 | -------------------------------------------------------------------------------- /portuguese/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | - 4 | a 5 | b 6 | c 7 | d 8 | e 9 | f 10 | g 11 | h 12 | i 13 | j 14 | k 15 | l 16 | m 17 | n 18 | o 19 | p 20 | q 21 | r 22 | s 23 | t 24 | u 25 | v 26 | w 27 | x 28 | y 29 | z 30 | à 31 | á 32 | â 33 | ã 34 | ç 35 | é 36 | ê 37 | í 38 | ó 39 | ô 40 | õ 41 | ú 42 | ü 43 | -------------------------------------------------------------------------------- /romanian/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | â 29 | î 30 | ă 31 | ș 32 | ț 33 | -------------------------------------------------------------------------------- /romanian/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | â 29 | î 30 | ă 31 | ș 32 | ț 33 | -------------------------------------------------------------------------------- /romansh-sursilvan/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | a 4 | b 5 | c 6 | d 7 | e 8 | f 9 | g 10 | h 11 | i 12 | j 13 | k 14 | l 15 | m 16 | n 17 | o 18 | p 19 | q 20 | r 21 | s 22 | t 23 | u 24 | v 25 | w 26 | x 27 | y 28 | z 29 | à 30 | ä 31 | è 32 | é 33 | î 34 | ò 35 | ö 36 | ü 37 | -------------------------------------------------------------------------------- /romansh-vallader/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | a 4 | b 5 | c 6 | d 7 | e 8 | f 9 | g 10 | h 11 | i 12 | j 13 | k 14 | l 15 | m 16 | n 17 | o 18 | p 19 | q 20 | r 21 | s 22 | t 23 | u 24 | v 25 | w 26 | x 27 | y 28 | z 29 | à 30 | â 31 | ä 32 | ç 33 | è 34 | é 35 | ê 36 | ì 37 | ï 38 | ò 39 | ö 40 | ü 41 | -------------------------------------------------------------------------------- /russian/jemeyer/v0.1.0/LICENSE: -------------------------------------------------------------------------------- 1 | https://creativecommons.org/publicdomain/zero/1.0/legalcode 2 | -------------------------------------------------------------------------------- /russian/jemeyer/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | а 3 | б 4 | в 5 | г 6 | д 7 | е 8 | ж 9 | з 10 | и 11 | й 12 | к 13 | л 14 | м 15 | н 16 | о 17 | п 18 | р 19 | с 20 | т 21 | у 22 | ф 23 | х 24 | ц 25 | ч 26 | ш 27 | щ 28 | ъ 29 | ы 30 | ь 31 | э 32 | ю 33 | я 34 | ё 35 | -------------------------------------------------------------------------------- /sakha/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Sakha STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Sakha / Саха тыла / `sah` 18 | - Model date: April 9, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{sakha-stt, author = {Tyers,Francis}, title = {Sakha STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-SAH-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Sakha Language](https://en.wikipedia.org/wiki/Sakha_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/sah/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|96.3\%|37.9\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /sakha/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | а 3 | б 4 | в 5 | г 6 | д 7 | е 8 | ж 9 | з 10 | и 11 | й 12 | к 13 | л 14 | м 15 | н 16 | о 17 | п 18 | р 19 | с 20 | т 21 | у 22 | ф 23 | х 24 | ц 25 | ч 26 | ш 27 | щ 28 | ы 29 | э 30 | ю 31 | я 32 | ё 33 | ҕ 34 | ҥ 35 | ү 36 | һ 37 | ө 38 | -------------------------------------------------------------------------------- /sakha/itml/v0.1.1/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Sakha STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Sakha / Саха тыла / `sah` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.1` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{sakha-stt, author = {Tyers,Francis}, title = {Sakha STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-SAH-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Sakha Language](https://en.wikipedia.org/wiki/Sakha_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/sah/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|94.5\%|36.3\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /sakha/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | а 3 | б 4 | в 5 | г 6 | д 7 | е 8 | ж 9 | з 10 | и 11 | й 12 | к 13 | л 14 | м 15 | н 16 | о 17 | п 18 | р 19 | с 20 | т 21 | у 22 | ф 23 | х 24 | ц 25 | ч 26 | ш 27 | щ 28 | ы 29 | э 30 | ю 31 | я 32 | ё 33 | ҕ 34 | ҥ 35 | ү 36 | һ 37 | ө 38 | -------------------------------------------------------------------------------- /slovenian/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | r 19 | s 20 | t 21 | u 22 | v 23 | w 24 | x 25 | y 26 | z 27 | č 28 | š 29 | ž 30 | -------------------------------------------------------------------------------- /slovenian/itml/v0.1.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | r 19 | s 20 | t 21 | u 22 | v 23 | w 24 | x 25 | y 26 | z 27 | č 28 | š 29 | ž 30 | -------------------------------------------------------------------------------- /spanish/jaco-assistant/v0.0.1/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | ' 33 | ñ 34 | # The last (non-comment) line needs to end with a newline. 35 | -------------------------------------------------------------------------------- /swahili-congo/twb/v0.3.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | -------------------------------------------------------------------------------- /swahili/coqui/v8.0-large-vocab/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Swahili STT v8.0 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Maintained by [Coqui](https://coqui.ai/). 17 | - Model language: Swahili / kiswahili / `sw` 18 | - Model date: March 8, 2022 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v8.0` 21 | - Compatible with 🐸 STT version: `v1.3.0` 22 | - License: Apache 2.0 23 | - Citation details: `@techreport{swahili-stt, author = {Coqui}, title = {Swahili STT v8.0}, institution = {Coqui}, address = {\url{https://coqui.ai/models}} year = {2022}, month = {March}, number = {STT-SW-8.0} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT` issues](https://github.com/coqui-ai/STT/issues), open a new discussion on [`STT` discussions](https://github.com/coqui-ai/STT/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Swahili Language](https://en.wikipedia.org/wiki/Swahili_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | Using the language model with settings `lm_alpha=0.898202045251655` and `lm_beta=2.2684674938753755` (found via `lm_optimizer.py`): 41 | 42 | - Swahili Common Voice 8.0 Test: WER: 15.8\%, CER: 6.6\% 43 | 44 | #### Model Size 45 | 46 | For STT, you always must deploy an acoustic model, and it is often the case you also will want to deploy an application-specific language model. 47 | 48 | |Model type|Vocabulary|Filename|Size| 49 | ----------------|-----|----------------|-----| 50 | |Acoustic model | open | `model.tflite` | 45M| 51 | |Language model | large | `large-vocabulary.scorer` |321M| 52 | 53 | ### Approaches to uncertainty and variability 54 | 55 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 56 | 57 | ## Training data 58 | 59 | This model was trained on the following corpora: Common Voice 8.0 Swahili. 60 | 61 | ## Evaluation data 62 | 63 | The validation ("dev") sets came from Common Voice 8.0. 64 | 65 | ## Ethical considerations 66 | 67 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 68 | 69 | ### Demographic Bias 70 | 71 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 72 | 73 | ### Surveillance 74 | 75 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 76 | 77 | ## Caveats and recommendations 78 | 79 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 80 | 81 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 82 | -------------------------------------------------------------------------------- /swahili/coqui/v8.0-large-vocab/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric index. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' in the Alphabet. 5 | 6 | ' 7 | a 8 | b 9 | c 10 | d 11 | e 12 | f 13 | g 14 | h 15 | i 16 | j 17 | k 18 | l 19 | m 20 | n 21 | o 22 | p 23 | q 24 | r 25 | s 26 | t 27 | u 28 | v 29 | w 30 | x 31 | y 32 | z 33 | # The last (non-comment) line needs to end with a newline. 34 | -------------------------------------------------------------------------------- /tamil/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Tamil STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Tamil / தமிழ் / `ta` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{tamil-stt, author = {Tyers,Francis}, title = {Tamil STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-TA-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Tamil Language](https://en.wikipedia.org/wiki/Tamil_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/ta/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|99.9\%|46.6\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /tamil/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ஃ 3 | அ 4 | ஆ 5 | இ 6 | ஈ 7 | உ 8 | ஊ 9 | எ 10 | ஏ 11 | ஐ 12 | ஒ 13 | ஓ 14 | ஔ 15 | க 16 | ங 17 | ச 18 | ஜ 19 | ஞ 20 | ட 21 | ண 22 | த 23 | ந 24 | ன 25 | ப 26 | ம 27 | ய 28 | ர 29 | ற 30 | ல 31 | ள 32 | ழ 33 | வ 34 | ஶ 35 | ஷ 36 | ஸ 37 | ஹ 38 | ா 39 | ி 40 | ீ 41 | ு 42 | ூ 43 | ெ 44 | ே 45 | ை 46 | ொ 47 | ோ 48 | ௌ 49 | ் 50 | -------------------------------------------------------------------------------- /tatar/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Tatar STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Tatar / Татарча / `tt` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{tatar-stt, author = {Tyers,Francis}, title = {Tatar STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-TT-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Tatar Language](https://en.wikipedia.org/wiki/Tatar_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/tt/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|85.8\%|31.7\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /tatar/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | а 3 | б 4 | в 5 | г 6 | д 7 | е 8 | ж 9 | з 10 | и 11 | й 12 | к 13 | л 14 | м 15 | н 16 | о 17 | п 18 | р 19 | с 20 | т 21 | у 22 | ф 23 | х 24 | ц 25 | ч 26 | ш 27 | щ 28 | ъ 29 | ы 30 | ь 31 | э 32 | ю 33 | я 34 | ё 35 | җ 36 | ң 37 | ү 38 | һ 39 | ә 40 | ө 41 | -------------------------------------------------------------------------------- /thai/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Thai STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Thai / ภาษาไทย / `th` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{thai-stt, author = {Tyers,Francis}, title = {Thai STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-TH-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Thai Language](https://en.wikipedia.org/wiki/Thai_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/ta/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|100\%|36.0\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /thai/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ก 3 | ข 4 | ค 5 | ฆ 6 | ง 7 | จ 8 | ฉ 9 | ช 10 | ซ 11 | ญ 12 | ฎ 13 | ฏ 14 | ฐ 15 | ฑ 16 | ฒ 17 | ณ 18 | ด 19 | ต 20 | ถ 21 | ท 22 | ธ 23 | น 24 | บ 25 | ป 26 | ผ 27 | ฝ 28 | พ 29 | ฟ 30 | ภ 31 | ม 32 | ย 33 | ร 34 | ฤ 35 | ล 36 | ว 37 | ศ 38 | ษ 39 | ส 40 | ห 41 | ฬ 42 | อ 43 | ฮ 44 | ะ 45 | ั 46 | า 47 | ำ 48 | ิ 49 | ี 50 | ึ 51 | ื 52 | ุ 53 | ู 54 | เ 55 | แ 56 | โ 57 | ใ 58 | ไ 59 | ๅ 60 | ็ 61 | ่ 62 | ้ 63 | ๊ 64 | ๋ 65 | ์ 66 | ํ 67 | ๎ 68 | ◌ 69 | -------------------------------------------------------------------------------- /totonac/bozden/v1.0.0/LICENSE: -------------------------------------------------------------------------------- 1 | https://creativecommons.org/licenses/by-nc-sa/3.0/ 2 | -------------------------------------------------------------------------------- /totonac/bozden/v1.0.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Sierra Totonac STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Bülent Özden](https://twitter.com/bulentozden), a member of [Common Voice Türkçe](https://twitter.com/CVTurkce). 17 | - Model language: Totonac / Sierra Totonac / `tos` 18 | - Model date: April 12, 2022 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v1.0.0` 21 | - Compatible with 🐸 STT version: `v1.3.0` 22 | - License: CC BY-NC-SA 3.0 23 | - Citation details: `@techreport{totonac-stt, author = {Bülent Özden}, title = {Totonac STT 1.0}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2022}, month = {April}, number = {STT-TOS-1.0} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Sierra Totonac Language](https://en.wikipedia.org/wiki/Sierra_Totonac_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | |Test Corpus|WER|CER| 41 | |-----------|---|---| 42 | |OpenSLR 107|87.5\%|25.8\%| 43 | 44 | #### Model Size 45 | 46 | `model.tflite`: 46M 47 | 48 | ### Approaches to uncertainty and variability 49 | 50 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 51 | 52 | ## Training data 53 | 54 | This model was trained on [Totonac Speech with transcription](http://openslr.org/107/) corpus. 55 | 56 | ## Evaluation data 57 | 58 | This model was evaluated on [Totonac Speech with transcription](http://openslr.org/107/) corpus. 59 | 60 | ## Ethical considerations 61 | 62 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 63 | 64 | ### Demographic Bias 65 | 66 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 67 | 68 | ### Surveillance 69 | 70 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 71 | 72 | ## Caveats and recommendations 73 | 74 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 75 | 76 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 77 | -------------------------------------------------------------------------------- /totonac/bozden/v1.0.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric index. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' in the Alphabet. 5 | 6 | a 7 | b 8 | c 9 | d 10 | e 11 | f 12 | g 13 | h 14 | i 15 | j 16 | k 17 | l 18 | m 19 | n 20 | o 21 | p 22 | q 23 | r 24 | s 25 | t 26 | u 27 | v 28 | w 29 | x 30 | y 31 | z 32 | á 33 | é 34 | í 35 | ñ 36 | ó 37 | ú 38 | ʼ 39 | # The last (non-comment) line needs to end with a newline. 40 | -------------------------------------------------------------------------------- /turkish/itml/v0.1.0/MODEL_CARD.md: -------------------------------------------------------------------------------- 1 | # Model card for Turkish STT 2 | 3 | Jump to section: 4 | 5 | - [Model details](#model-details) 6 | - [Intended use](#intended-use) 7 | - [Performance Factors](#performance-factors) 8 | - [Metrics](#metrics) 9 | - [Training data](#training-data) 10 | - [Evaluation data](#evaluation-data) 11 | - [Ethical considerations](#ethical-considerations) 12 | - [Caveats and recommendations](#caveats-and-recommendations) 13 | 14 | ## Model details 15 | 16 | - Person or organization developing model: Originally trained by [Francis Tyers](https://scholar.google.fr/citations?user=o5HSM6cAAAAJ) and the [Inclusive Technology for Marginalised Languages](https://itml.cl.indiana.edu/) group. 17 | - Model language: Turkish / Türkçe / `tr` 18 | - Model date: April 26, 2021 19 | - Model type: `Speech-to-Text` 20 | - Model version: `v0.1.0` 21 | - Compatible with 🐸 STT version: `v0.9.3` 22 | - License: AGPL 23 | - Citation details: `@techreport{turkish-stt, author = {Tyers,Francis}, title = {Turkish STT 0.1}, institution = {Coqui}, address = {\url{https://github.com/coqui-ai/STT-models}} year = {2021}, month = {April}, number = {STT-CV6.1-TR-0.1} }` 24 | - Where to send questions or comments about the model: You can leave an issue on [`STT-model` issues](https://github.com/coqui-ai/STT-models/issues), open a new discussion on [`STT-model` discussions](https://github.com/coqui-ai/STT-models/discussions), or chat with us on [Gitter](https://gitter.im/coqui-ai/). 25 | 26 | ## Intended use 27 | 28 | Speech-to-Text for the [Turkish Language](https://en.wikipedia.org/wiki/Turkish_language) on 16kHz, mono-channel audio. 29 | 30 | ## Performance Factors 31 | 32 | Factors relevant to Speech-to-Text performance include but are not limited to speaker demographics, recording quality, and background noise. Read more about STT performance factors [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 33 | 34 | ## Metrics 35 | 36 | STT models are usually evaluated in terms of their transcription accuracy, deployment Real-Time Factor, and model size on disk. 37 | 38 | #### Transcription Accuracy 39 | 40 | The following Word Error Rates and Character Error Rates are reported on [omnilingo](https://tepozcatl.omnilingo.cc/tr/). 41 | 42 | |Test Corpus|WER|CER| 43 | |-----------|---|---| 44 | |Common Voice|89.3\%|30.8\%| 45 | 46 | #### Real-Time Factor 47 | 48 | Real-Time Factor (RTF) is defined as `processing-time / length-of-audio`. The exact real-time factor of an STT model will depend on the hardware setup, so you may experience a different RTF. 49 | 50 | Recorded average RTF on laptop CPU: `` 51 | 52 | #### Model Size 53 | 54 | `model.pbmm`: 181M 55 | `model.tflite`: 46M 56 | 57 | ### Approaches to uncertainty and variability 58 | 59 | Confidence scores and multiple paths from the decoding beam can be used to measure model uncertainty and provide multiple, variable transcripts for any processed audio. 60 | 61 | ## Training data 62 | 63 | This model was trained on Common Voice 6.1 train. 64 | 65 | ## Evaluation data 66 | 67 | The Model was evaluated on Common Voice 6.1 test. 68 | 69 | ## Ethical considerations 70 | 71 | Deploying a Speech-to-Text model into any production setting has ethical implications. You should consider these implications before use. 72 | 73 | ### Demographic Bias 74 | 75 | You should assume every machine learning model has demographic bias unless proven otherwise. For STT models, it is often the case that transcription accuracy is better for men than it is for women. If you are using this model in production, you should acknowledge this as a potential issue. 76 | 77 | ### Surveillance 78 | 79 | Speech-to-Text may be mis-used to invade the privacy of others by recording and mining information from private conversations. This kind of individual privacy is protected by law in may countries. You should not assume consent to record and analyze private speech. 80 | 81 | ## Caveats and recommendations 82 | 83 | Machine learning models (like this STT model) perform best on data that is similar to the data on which they were trained. Read about what to expect from an STT model with regard to your data [here](https://stt.readthedocs.io/en/latest/DEPLOYMENT.html#how-will-a-model-perform-on-my-data). 84 | 85 | In most applications, it is recommended that you [train your own language model](https://stt.readthedocs.io/en/latest/LANGUAGE_MODEL.html) to improve transcription accuracy on your speech data. 86 | -------------------------------------------------------------------------------- /turkish/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | a 4 | b 5 | c 6 | d 7 | e 8 | f 9 | g 10 | h 11 | i 12 | j 13 | k 14 | l 15 | m 16 | n 17 | o 18 | p 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | â 29 | ç 30 | ö 31 | ü 32 | ğ 33 | ı 34 | ş 35 | -------------------------------------------------------------------------------- /ukrainian/robinhad/v0.4/LICENSE: -------------------------------------------------------------------------------- 1 | https://creativecommons.org/licenses/by-nc/4.0/legalcode 2 | -------------------------------------------------------------------------------- /ukrainian/robinhad/v0.4/alphabet.txt: -------------------------------------------------------------------------------- 1 | # Each line in this file represents the Unicode codepoint (UTF-8 encoded) 2 | # associated with a numeric label. 3 | # A line that starts with # is a comment. You can escape it with \# if you wish 4 | # to use '#' as a label. 5 | 6 | ’ 7 | а 8 | б 9 | в 10 | г 11 | ґ 12 | д 13 | е 14 | є 15 | ж 16 | з 17 | и 18 | і 19 | ї 20 | й 21 | к 22 | л 23 | м 24 | н 25 | о 26 | п 27 | р 28 | с 29 | т 30 | у 31 | ф 32 | х 33 | ц 34 | ч 35 | ш 36 | щ 37 | ь 38 | ю 39 | я 40 | # The last (non-comment) line needs to end with a newline. 41 | -------------------------------------------------------------------------------- /upper-sorbian/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | q 19 | r 20 | s 21 | t 22 | u 23 | v 24 | w 25 | x 26 | y 27 | z 28 | á 29 | ó 30 | ö 31 | ü 32 | ć 33 | č 34 | ě 35 | ł 36 | ń 37 | ň 38 | ř 39 | ś 40 | š 41 | ź 42 | ž 43 | -------------------------------------------------------------------------------- /welsh/techiaith/v21.03/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Coqui GmbH 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /welsh/techiaith/v21.03/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | r 19 | s 20 | t 21 | u 22 | v 23 | w 24 | y 25 | z 26 | á 27 | â 28 | ä 29 | é 30 | ê 31 | ë 32 | î 33 | ï 34 | ô 35 | ö 36 | ô 37 | û 38 | ŵ 39 | ŷ 40 | ' 41 | . 42 | ! 43 | ? 44 | -------------------------------------------------------------------------------- /wolof/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | i 10 | j 11 | k 12 | l 13 | m 14 | n 15 | o 16 | p 17 | q 18 | r 19 | s 20 | t 21 | u 22 | w 23 | x 24 | y 25 | à 26 | é 27 | ë 28 | ñ 29 | ó 30 | ŋ 31 | -------------------------------------------------------------------------------- /yoruba/itml/v0.1.0/alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | ' 3 | a 4 | b 5 | d 6 | e 7 | f 8 | g 9 | h 10 | i 11 | j 12 | k 13 | l 14 | m 15 | n 16 | o 17 | p 18 | r 19 | s 20 | t 21 | u 22 | w 23 | y 24 | ̀ 25 | ́ 26 | ṣ 27 | ẹ 28 | ọ 29 | --------------------------------------------------------------------------------