├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── benchmark
    ├── benchmark.m4a
    ├── evaluate_yt_commons.py
    ├── memory_benchmark.py
    ├── normalizer.json
    ├── requirements.benchmark.txt
    ├── speed_benchmark.py
    ├── utils.py
    └── wer_benchmark.py
├── docker
    ├── Dockerfile
    ├── infer.py
    └── jfk.flac
├── faster_whisper
    ├── __init__.py
    ├── assets
    │   ├── __init__.py
    │   ├── silero_decoder_v5.onnx
    │   └── silero_encoder_v5.onnx
    ├── audio.py
    ├── feature_extractor.py
    ├── tokenizer.py
    ├── transcribe.py
    ├── utils.py
    ├── vad.py
    └── version.py
├── requirements.conversion.txt
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
    ├── conftest.py
    ├── data
        ├── hotwords.mp3
        ├── jfk.flac
        ├── multilingual.mp3
        ├── physicsworks.wav
        └── stereo_diarization.wav
    ├── test_tokenizer.py
    ├── test_transcribe.py
    └── test_utils.py


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |     tags:
 8 |       - v*
 9 |   pull_request:
10 |     branches:
11 |       - master
12 | 
13 | jobs:
14 |   check-code-format:
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v4
19 | 
20 |       - name: Set up Python 3.9
21 |         uses: actions/setup-python@v5
22 |         with:
23 |           python-version: 3.9
24 | 
25 |       - name: Install module
26 |         run: |
27 |           pip install wheel
28 |           pip install -e .[dev]
29 | 
30 |       - name: Check code format with Black
31 |         run: |
32 |           black --check .
33 | 
34 |       - name: Check imports order with isort
35 |         run: |
36 |           isort --check-only .
37 | 
38 |       - name: Check code style with Flake8
39 |         if: ${{ always() }}
40 |         run: |
41 |           flake8 .
42 | 
43 | 
44 |   run-tests:
45 |     runs-on: ubuntu-latest
46 | 
47 |     steps:
48 |       - uses: actions/checkout@v4
49 | 
50 |       - name: Set up Python 3.9
51 |         uses: actions/setup-python@v5
52 |         with:
53 |           python-version: 3.9
54 | 
55 |       - name: Install module
56 |         run: |
57 |           pip install wheel
58 |           pip install -e .[dev]
59 | 
60 |       - name: Run pytest
61 |         run: |
62 |           pytest -v tests/
63 | 
64 | 
65 |   build-and-push-package:
66 |     runs-on: ubuntu-latest
67 |     needs: [check-code-format, run-tests]
68 | 
69 |     steps:
70 |       - uses: actions/checkout@v4
71 | 
72 |       - name: Set up Python 3.9
73 |         uses: actions/setup-python@v5
74 |         with:
75 |           python-version: 3.9
76 | 
77 |       - name: Install dependencies
78 |         run: |
79 |           pip install wheel
80 | 
81 |       - name: Build package
82 |         run: |
83 |           python3 setup.py sdist bdist_wheel
84 | 
85 |       - name: Push package on PyPI
86 |         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
87 |         uses: pypa/gh-action-pypi-publish@release/v1
88 |         with:
89 |           user: __token__
90 |           password: ${{ secrets.PYPI_API_TOKEN }}
91 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / Optimized / DLL Files
 2 | *.pyc
 3 | *.pyo
 4 | *.pyd
 5 | __pycache__/
 6 | 
 7 | # Distribution / Packaging
 8 | venv/
 9 | 
10 | # Unit Test
11 | .pytest_cache/
12 | 
13 | # Ignore IDE, Editor Files
14 | .idea/
15 | .vscode/
16 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to faster-whisper
 2 | 
 3 | Contributions are welcome! Here are some pointers to help you install the library for development and validate your changes before submitting a pull request.
 4 | 
 5 | ## Install the library for development
 6 | 
 7 | We recommend installing the module in editable mode with the `dev` extra requirements:
 8 | 
 9 | ```bash
10 | git clone https://github.com/SYSTRAN/faster-whisper.git
11 | cd faster-whisper/
12 | pip install -e .[dev]
13 | ```
14 | 
15 | ## Validate the changes before creating a pull request
16 | 
17 | 1. Make sure the existing tests are still passing (and consider adding new tests as well!):
18 | 
19 | ```bash
20 | pytest tests/
21 | ```
22 | 
23 | 2. Reformat and validate the code with the following tools:
24 | 
25 | ```bash
26 | black .
27 | isort .
28 | flake8 .
29 | ```
30 | 
31 | These steps are also run automatically in the CI when you open the pull request.
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 SYSTRAN
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include faster_whisper/assets/silero_encoder_v5.onnx
2 | include faster_whisper/assets/silero_decoder_v5.onnx
3 | include requirements.txt
4 | include requirements.conversion.txt
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![CI](https://github.com/SYSTRAN/faster-whisper/workflows/CI/badge.svg)](https://github.com/SYSTRAN/faster-whisper/actions?query=workflow%3ACI) [![PyPI version](https://badge.fury.io/py/faster-whisper.svg)](https://badge.fury.io/py/faster-whisper)
  2 | 
  3 | # Faster Whisper transcription with CTranslate2
  4 | 
  5 | **faster-whisper** is a reimplementation of OpenAI's Whisper model using [CTranslate2](https://github.com/OpenNMT/CTranslate2/), which is a fast inference engine for Transformer models.
  6 | 
  7 | This implementation is up to 4 times faster than [openai/whisper](https://github.com/openai/whisper) for the same accuracy while using less memory. The efficiency can be further improved with 8-bit quantization on both CPU and GPU.
  8 | 
  9 | ## Benchmark
 10 | 
 11 | ### Whisper
 12 | 
 13 | For reference, here's the time and memory usage that are required to transcribe [**13 minutes**](https://www.youtube.com/watch?v=0u7tTptBo9I) of audio using different implementations:
 14 | 
 15 | * [openai/whisper](https://github.com/openai/whisper)@[v20240930](https://github.com/openai/whisper/tree/v20240930)
 16 | * [whisper.cpp](https://github.com/ggerganov/whisper.cpp)@[v1.7.2](https://github.com/ggerganov/whisper.cpp/tree/v1.7.2)
 17 | * [transformers](https://github.com/huggingface/transformers)@[v4.46.3](https://github.com/huggingface/transformers/tree/v4.46.3)
 18 | * [faster-whisper](https://github.com/SYSTRAN/faster-whisper)@[v1.1.0](https://github.com/SYSTRAN/faster-whisper/tree/v1.1.0)
 19 | 
 20 | ### Large-v2 model on GPU
 21 | 
 22 | | Implementation | Precision | Beam size | Time | VRAM Usage |
 23 | | --- | --- | --- | --- | --- |
 24 | | openai/whisper | fp16 | 5 | 2m23s | 4708MB |
 25 | | whisper.cpp (Flash Attention) | fp16 | 5 | 1m05s | 4127MB |
 26 | | transformers (SDPA)[^1] | fp16 | 5 | 1m52s | 4960MB |
 27 | | faster-whisper | fp16 | 5 | 1m03s | 4525MB |
 28 | | faster-whisper (`batch_size=8`) | fp16 | 5 | 17s | 6090MB |
 29 | | faster-whisper | int8 | 5 | 59s | 2926MB |
 30 | | faster-whisper (`batch_size=8`) | int8 | 5 | 16s | 4500MB |
 31 | 
 32 | ### distil-whisper-large-v3 model on GPU
 33 | 
 34 | | Implementation | Precision | Beam size | Time | YT Commons WER |
 35 | | --- | --- | --- | --- | --- |
 36 | | transformers (SDPA) (`batch_size=16`) | fp16 | 5 | 46m12s | 14.801 |
 37 | | faster-whisper (`batch_size=16`) | fp16 | 5 | 25m50s | 13.527 |
 38 | 
 39 | *GPU Benchmarks are Executed with CUDA 12.4 on a NVIDIA RTX 3070 Ti 8GB.*
 40 | [^1]: transformers OOM for any batch size > 1
 41 | 
 42 | ### Small model on CPU
 43 | 
 44 | | Implementation | Precision | Beam size | Time | RAM Usage |
 45 | | --- | --- | --- | --- | --- |
 46 | | openai/whisper | fp32 | 5 | 6m58s | 2335MB |
 47 | | whisper.cpp | fp32 | 5 | 2m05s | 1049MB |
 48 | | whisper.cpp (OpenVINO) | fp32 | 5 | 1m45s | 1642MB |
 49 | | faster-whisper | fp32 | 5 | 2m37s | 2257MB |
 50 | | faster-whisper (`batch_size=8`) | fp32 | 5 | 1m06s | 4230MB |
 51 | | faster-whisper | int8 | 5 | 1m42s | 1477MB |
 52 | | faster-whisper (`batch_size=8`) | int8 | 5 | 51s | 3608MB |
 53 | 
 54 | *Executed with 8 threads on an Intel Core i7-12700K.*
 55 | 
 56 | 
 57 | ## Requirements
 58 | 
 59 | * Python 3.9 or greater
 60 | 
 61 | Unlike openai-whisper, FFmpeg does **not** need to be installed on the system. The audio is decoded with the Python library [PyAV](https://github.com/PyAV-Org/PyAV) which bundles the FFmpeg libraries in its package.
 62 | 
 63 | ### GPU
 64 | 
 65 | GPU execution requires the following NVIDIA libraries to be installed:
 66 | 
 67 | * [cuBLAS for CUDA 12](https://developer.nvidia.com/cublas)
 68 | * [cuDNN 9 for CUDA 12](https://developer.nvidia.com/cudnn)
 69 | 
 70 | **Note**: The latest versions of `ctranslate2` only support CUDA 12 and cuDNN 9. For CUDA 11 and cuDNN 8, the current workaround is downgrading to the `3.24.0` version of `ctranslate2`, for CUDA 12 and cuDNN 8, downgrade to the `4.4.0` version of `ctranslate2`, (This can be done with `pip install --force-reinstall ctranslate2==4.4.0` or specifying the version in a `requirements.txt`).
 71 | 
 72 | There are multiple ways to install the NVIDIA libraries mentioned above. The recommended way is described in the official NVIDIA documentation, but we also suggest other installation methods below. 
 73 | 
 74 | <details>
 75 | <summary>Other installation methods (click to expand)</summary>
 76 | 
 77 | 
 78 | **Note:** For all these methods below, keep in mind the above note regarding CUDA versions. Depending on your setup, you may need to install the _CUDA 11_ versions of libraries that correspond to the CUDA 12 libraries listed in the instructions below.
 79 | 
 80 | #### Use Docker
 81 | 
 82 | The libraries (cuBLAS, cuDNN) are installed in this official NVIDIA CUDA Docker images: `nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04`.
 83 | 
 84 | #### Install with `pip` (Linux only)
 85 | 
 86 | On Linux these libraries can be installed with `pip`. Note that `LD_LIBRARY_PATH` must be set before launching Python.
 87 | 
 88 | ```bash
 89 | pip install nvidia-cublas-cu12 nvidia-cudnn-cu12==9.*
 90 | 
 91 | export LD_LIBRARY_PATH=`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'`
 92 | ```
 93 | 
 94 | #### Download the libraries from Purfview's repository (Windows & Linux)
 95 | 
 96 | Purfview's [whisper-standalone-win](https://github.com/Purfview/whisper-standalone-win) provides the required NVIDIA libraries for Windows & Linux in a [single archive](https://github.com/Purfview/whisper-standalone-win/releases/tag/libs). Decompress the archive and place the libraries in a directory included in the `PATH`.
 97 | 
 98 | </details>
 99 | 
100 | ## Installation
101 | 
102 | The module can be installed from [PyPI](https://pypi.org/project/faster-whisper/):
103 | 
104 | ```bash
105 | pip install faster-whisper
106 | ```
107 | 
108 | <details>
109 | <summary>Other installation methods (click to expand)</summary>
110 | 
111 | ### Install the master branch
112 | 
113 | ```bash
114 | pip install --force-reinstall "faster-whisper @ https://github.com/SYSTRAN/faster-whisper/archive/refs/heads/master.tar.gz"
115 | ```
116 | 
117 | ### Install a specific commit
118 | 
119 | ```bash
120 | pip install --force-reinstall "faster-whisper @ https://github.com/SYSTRAN/faster-whisper/archive/a4f1cc8f11433e454c3934442b5e1a4ed5e865c3.tar.gz"
121 | ```
122 | 
123 | </details>
124 | 
125 | ## Usage
126 | 
127 | ### Faster-whisper
128 | 
129 | ```python
130 | from faster_whisper import WhisperModel
131 | 
132 | model_size = "large-v3"
133 | 
134 | # Run on GPU with FP16
135 | model = WhisperModel(model_size, device="cuda", compute_type="float16")
136 | 
137 | # or run on GPU with INT8
138 | # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
139 | # or run on CPU with INT8
140 | # model = WhisperModel(model_size, device="cpu", compute_type="int8")
141 | 
142 | segments, info = model.transcribe("audio.mp3", beam_size=5)
143 | 
144 | print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
145 | 
146 | for segment in segments:
147 |     print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
148 | ```
149 | 
150 | **Warning:** `segments` is a *generator* so the transcription only starts when you iterate over it. The transcription can be run to completion by gathering the segments in a list or a `for` loop:
151 | 
152 | ```python
153 | segments, _ = model.transcribe("audio.mp3")
154 | segments = list(segments)  # The transcription will actually run here.
155 | ```
156 | 
157 | ### Batched Transcription
158 | The following code snippet illustrates how to run batched transcription on an example audio file. `BatchedInferencePipeline.transcribe` is a drop-in replacement for `WhisperModel.transcribe`
159 | 
160 | ```python
161 | from faster_whisper import WhisperModel, BatchedInferencePipeline
162 | 
163 | model = WhisperModel("turbo", device="cuda", compute_type="float16")
164 | batched_model = BatchedInferencePipeline(model=model)
165 | segments, info = batched_model.transcribe("audio.mp3", batch_size=16)
166 | 
167 | for segment in segments:
168 |     print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
169 | ```
170 | 
171 | ### Faster Distil-Whisper
172 | 
173 | The Distil-Whisper checkpoints are compatible with the Faster-Whisper package. In particular, the latest [distil-large-v3](https://huggingface.co/distil-whisper/distil-large-v3)
174 | checkpoint is intrinsically designed to work with the Faster-Whisper transcription algorithm. The following code snippet 
175 | demonstrates how to run inference with distil-large-v3 on a specified audio file:
176 | 
177 | ```python
178 | from faster_whisper import WhisperModel
179 | 
180 | model_size = "distil-large-v3"
181 | 
182 | model = WhisperModel(model_size, device="cuda", compute_type="float16")
183 | segments, info = model.transcribe("audio.mp3", beam_size=5, language="en", condition_on_previous_text=False)
184 | 
185 | for segment in segments:
186 |     print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
187 | ```
188 | 
189 | For more information about the distil-large-v3 model, refer to the original [model card](https://huggingface.co/distil-whisper/distil-large-v3).
190 | 
191 | ### Word-level timestamps
192 | 
193 | ```python
194 | segments, _ = model.transcribe("audio.mp3", word_timestamps=True)
195 | 
196 | for segment in segments:
197 |     for word in segment.words:
198 |         print("[%.2fs -> %.2fs] %s" % (word.start, word.end, word.word))
199 | ```
200 | 
201 | ### VAD filter
202 | 
203 | The library integrates the [Silero VAD](https://github.com/snakers4/silero-vad) model to filter out parts of the audio without speech:
204 | 
205 | ```python
206 | segments, _ = model.transcribe("audio.mp3", vad_filter=True)
207 | ```
208 | 
209 | The default behavior is conservative and only removes silence longer than 2 seconds. See the available VAD parameters and default values in the [source code](https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/vad.py). They can be customized with the dictionary argument `vad_parameters`:
210 | 
211 | ```python
212 | segments, _ = model.transcribe(
213 |     "audio.mp3",
214 |     vad_filter=True,
215 |     vad_parameters=dict(min_silence_duration_ms=500),
216 | )
217 | ```
218 | Vad filter is enabled by default for batched transcription.
219 | 
220 | ### Logging
221 | 
222 | The library logging level can be configured like this:
223 | 
224 | ```python
225 | import logging
226 | 
227 | logging.basicConfig()
228 | logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
229 | ```
230 | 
231 | ### Going further
232 | 
233 | See more model and transcription options in the [`WhisperModel`](https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/transcribe.py) class implementation.
234 | 
235 | ## Community integrations
236 | 
237 | Here is a non exhaustive list of open-source projects using faster-whisper. Feel free to add your project to the list!
238 | 
239 | 
240 | * [speaches](https://github.com/speaches-ai/speaches) is an OpenAI compatible server using `faster-whisper`. It's easily deployable with Docker, works with OpenAI SDKs/CLI, supports streaming, and live transcription.
241 | * [WhisperX](https://github.com/m-bain/whisperX) is an award-winning Python library that offers speaker diarization and accurate word-level timestamps using wav2vec2 alignment
242 | * [whisper-ctranslate2](https://github.com/Softcatala/whisper-ctranslate2) is a command line client based on faster-whisper and compatible with the original client from openai/whisper.
243 | * [whisper-diarize](https://github.com/MahmoudAshraf97/whisper-diarization) is a speaker diarization tool that is based on faster-whisper and NVIDIA NeMo.
244 | * [whisper-standalone-win](https://github.com/Purfview/whisper-standalone-win) Standalone CLI executables of faster-whisper for Windows, Linux & macOS. 
245 | * [asr-sd-pipeline](https://github.com/hedrergudene/asr-sd-pipeline) provides a scalable, modular, end to end multi-speaker speech to text solution implemented using AzureML pipelines.
246 | * [Open-Lyrics](https://github.com/zh-plus/Open-Lyrics) is a Python library that transcribes voice files using faster-whisper, and translates/polishes the resulting text into `.lrc` files in the desired language using OpenAI-GPT.
247 | * [wscribe](https://github.com/geekodour/wscribe) is a flexible transcript generation tool supporting faster-whisper, it can export word level transcript and the exported transcript then can be edited with [wscribe-editor](https://github.com/geekodour/wscribe-editor)
248 | * [aTrain](https://github.com/BANDAS-Center/aTrain) is a graphical user interface implementation of faster-whisper developed at the BANDAS-Center at the University of Graz for transcription and diarization in Windows ([Windows Store App](https://apps.microsoft.com/detail/atrain/9N15Q44SZNS2)) and Linux.
249 | * [Whisper-Streaming](https://github.com/ufal/whisper_streaming) implements real-time mode for offline Whisper-like speech-to-text models with faster-whisper as the most recommended back-end. It implements a streaming policy with self-adaptive latency based on the actual source complexity, and demonstrates the state of the art.
250 | * [WhisperLive](https://github.com/collabora/WhisperLive) is a nearly-live implementation of OpenAI's Whisper which uses faster-whisper as the backend to transcribe audio in real-time.
251 | * [Faster-Whisper-Transcriber](https://github.com/BBC-Esq/ctranslate2-faster-whisper-transcriber) is a simple but reliable voice transcriber that provides a user-friendly interface.
252 | * [Open-dubbing](https://github.com/softcatala/open-dubbing) is open dubbing is an AI dubbing system which uses machine learning models to automatically translate and synchronize audio dialogue into different languages.
253 | 
254 | ## Model conversion
255 | 
256 | When loading a model from its size such as `WhisperModel("large-v3")`, the corresponding CTranslate2 model is automatically downloaded from the [Hugging Face Hub](https://huggingface.co/Systran).
257 | 
258 | We also provide a script to convert any Whisper models compatible with the Transformers library. They could be the original OpenAI models or user fine-tuned models.
259 | 
260 | For example the command below converts the [original "large-v3" Whisper model](https://huggingface.co/openai/whisper-large-v3) and saves the weights in FP16:
261 | 
262 | ```bash
263 | pip install transformers[torch]>=4.23
264 | 
265 | ct2-transformers-converter --model openai/whisper-large-v3 --output_dir whisper-large-v3-ct2
266 | --copy_files tokenizer.json preprocessor_config.json --quantization float16
267 | ```
268 | 
269 | * The option `--model` accepts a model name on the Hub or a path to a model directory.
270 | * If the option `--copy_files tokenizer.json` is not used, the tokenizer configuration is automatically downloaded when the model is loaded later.
271 | 
272 | Models can also be converted from the code. See the [conversion API](https://opennmt.net/CTranslate2/python/ctranslate2.converters.TransformersConverter.html).
273 | 
274 | ### Load a converted model
275 | 
276 | 1. Directly load the model from a local directory:
277 | ```python
278 | model = faster_whisper.WhisperModel("whisper-large-v3-ct2")
279 | ```
280 | 
281 | 2. [Upload your model to the Hugging Face Hub](https://huggingface.co/docs/transformers/model_sharing#upload-with-the-web-interface) and load it from its name:
282 | ```python
283 | model = faster_whisper.WhisperModel("username/whisper-large-v3-ct2")
284 | ```
285 | 
286 | ## Comparing performance against other implementations
287 | 
288 | If you are comparing the performance against other Whisper implementations, you should make sure to run the comparison with similar settings. In particular:
289 | 
290 | * Verify that the same transcription options are used, especially the same beam size. For example in openai/whisper, `model.transcribe` uses a default beam size of 1 but here we use a default beam size of 5.
291 | * Transcription speed is closely affected by the number of words in the transcript, so ensure that other implementations have a similar WER (Word Error Rate) to this one.
292 | * When running on CPU, make sure to set the same number of threads. Many frameworks will read the environment variable `OMP_NUM_THREADS`, which can be set when running your script:
293 | 
294 | ```bash
295 | OMP_NUM_THREADS=4 python3 my_script.py
296 | ```
297 | 


--------------------------------------------------------------------------------
/benchmark/benchmark.m4a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/benchmark/benchmark.m4a


--------------------------------------------------------------------------------
/benchmark/evaluate_yt_commons.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import os
 4 | 
 5 | from io import BytesIO
 6 | 
 7 | from datasets import load_dataset
 8 | from jiwer import wer
 9 | from pytubefix import YouTube
10 | from pytubefix.exceptions import VideoUnavailable
11 | from tqdm import tqdm
12 | from transformers.models.whisper.english_normalizer import EnglishTextNormalizer
13 | 
14 | from faster_whisper import BatchedInferencePipeline, WhisperModel, decode_audio
15 | 
16 | 
17 | def url_to_audio(row):
18 |     buffer = BytesIO()
19 |     yt = YouTube(row["link"])
20 |     try:
21 |         video = (
22 |             yt.streams.filter(only_audio=True, mime_type="audio/mp4")
23 |             .order_by("bitrate")
24 |             .desc()
25 |             .last()
26 |         )
27 |         video.stream_to_buffer(buffer)
28 |         buffer.seek(0)
29 |         row["audio"] = decode_audio(buffer)
30 |     except VideoUnavailable:
31 |         print(f'Failed to download: {row["link"]}')
32 |         row["audio"] = []
33 |     return row
34 | 
35 | 
36 | parser = argparse.ArgumentParser(description="WER benchmark")
37 | parser.add_argument(
38 |     "--audio_numb",
39 |     type=int,
40 |     default=None,
41 |     help="Specify the number of validation audio files in the dataset."
42 |     " Set to None to retrieve all audio files.",
43 | )
44 | args = parser.parse_args()
45 | 
46 | with open(os.path.join(os.path.dirname(__file__), "normalizer.json"), "r") as f:
47 |     normalizer = EnglishTextNormalizer(json.load(f))
48 | 
49 | dataset = load_dataset("mobiuslabsgmbh/youtube-commons-asr-eval", streaming=True).map(
50 |     url_to_audio
51 | )
52 | model = WhisperModel("large-v3", device="cuda")
53 | pipeline = BatchedInferencePipeline(model, device="cuda")
54 | 
55 | 
56 | all_transcriptions = []
57 | all_references = []
58 | # iterate over the dataset and run inference
59 | for i, row in tqdm(enumerate(dataset["test"]), desc="Evaluating..."):
60 |     if not row["audio"]:
61 |         continue
62 |     result, info = pipeline.transcribe(
63 |         row["audio"][0],
64 |         batch_size=8,
65 |         word_timestamps=False,
66 |         without_timestamps=True,
67 |     )
68 | 
69 |     all_transcriptions.append("".join(segment.text for segment in result))
70 |     all_references.append(row["text"][0])
71 |     if args.audio_numb and i == (args.audio_numb - 1):
72 |         break
73 | 
74 | # normalize predictions and references
75 | all_transcriptions = [normalizer(transcription) for transcription in all_transcriptions]
76 | all_references = [normalizer(reference) for reference in all_references]
77 | 
78 | # compute the WER metric
79 | word_error_rate = 100 * wer(hypothesis=all_transcriptions, reference=all_references)
80 | print("WER: %.3f" % word_error_rate)
81 | 


--------------------------------------------------------------------------------
/benchmark/memory_benchmark.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import time
 3 | 
 4 | from typing import Callable
 5 | 
 6 | import py3nvml.py3nvml as nvml
 7 | 
 8 | from memory_profiler import memory_usage
 9 | from utils import MyThread, get_logger, inference
10 | 
11 | logger = get_logger("faster-whisper")
12 | parser = argparse.ArgumentParser(description="Memory benchmark")
13 | parser.add_argument(
14 |     "--gpu_memory", action="store_true", help="Measure GPU memory usage"
15 | )
16 | parser.add_argument("--device-index", type=int, default=0, help="GPU device index")
17 | parser.add_argument(
18 |     "--interval",
19 |     type=float,
20 |     default=0.5,
21 |     help="Interval at which measurements are collected",
22 | )
23 | args = parser.parse_args()
24 | device_idx = args.device_index
25 | interval = args.interval
26 | 
27 | 
28 | def measure_memory(func: Callable[[], None]):
29 |     if args.gpu_memory:
30 |         logger.info(
31 |             "Measuring maximum GPU memory usage on GPU device."
32 |             " Make sure to not have additional processes running on the same GPU."
33 |         )
34 |         # init nvml
35 |         nvml.nvmlInit()
36 |         handle = nvml.nvmlDeviceGetHandleByIndex(device_idx)
37 |         gpu_name = nvml.nvmlDeviceGetName(handle)
38 |         gpu_memory_limit = nvml.nvmlDeviceGetMemoryInfo(handle).total >> 20
39 |         gpu_power_limit = nvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000.0
40 |         info = {"gpu_memory_usage": [], "gpu_power_usage": []}
41 | 
42 |         def _get_gpu_info():
43 |             while True:
44 |                 info["gpu_memory_usage"].append(
45 |                     nvml.nvmlDeviceGetMemoryInfo(handle).used >> 20
46 |                 )
47 |                 info["gpu_power_usage"].append(
48 |                     nvml.nvmlDeviceGetPowerUsage(handle) / 1000
49 |                 )
50 |                 time.sleep(interval)
51 | 
52 |                 if stop:
53 |                     break
54 | 
55 |             return info
56 | 
57 |         stop = False
58 |         thread = MyThread(_get_gpu_info, params=())
59 |         thread.start()
60 |         func()
61 |         stop = True
62 |         thread.join()
63 |         result = thread.get_result()
64 | 
65 |         # shutdown nvml
66 |         nvml.nvmlShutdown()
67 |         max_memory_usage = max(result["gpu_memory_usage"])
68 |         max_power_usage = max(result["gpu_power_usage"])
69 |         print("GPU name: %s" % gpu_name)
70 |         print("GPU device index: %s" % device_idx)
71 |         print(
72 |             "Maximum GPU memory usage: %dMiB / %dMiB (%.2f%%)"
73 |             % (
74 |                 max_memory_usage,
75 |                 gpu_memory_limit,
76 |                 (max_memory_usage / gpu_memory_limit) * 100,
77 |             )
78 |         )
79 |         print(
80 |             "Maximum GPU power usage: %dW / %dW (%.2f%%)"
81 |             % (
82 |                 max_power_usage,
83 |                 gpu_power_limit,
84 |                 (max_power_usage / gpu_power_limit) * 100,
85 |             )
86 |         )
87 |     else:
88 |         logger.info("Measuring maximum increase of memory usage.")
89 |         max_usage = memory_usage(func, max_usage=True, interval=interval)
90 |         print("Maximum increase of RAM memory usage: %d MiB" % max_usage)
91 | 
92 | 
93 | if __name__ == "__main__":
94 |     measure_memory(inference)
95 | 


--------------------------------------------------------------------------------
/benchmark/normalizer.json:
--------------------------------------------------------------------------------
   1 | {
   2 |   "accessorise": "accessorize",
   3 |   "accessorised": "accessorized",
   4 |   "accessorises": "accessorizes",
   5 |   "accessorising": "accessorizing",
   6 |   "acclimatisation": "acclimatization",
   7 |   "acclimatise": "acclimatize",
   8 |   "acclimatised": "acclimatized",
   9 |   "acclimatises": "acclimatizes",
  10 |   "acclimatising": "acclimatizing",
  11 |   "accoutrements": "accouterments",
  12 |   "aeon": "eon",
  13 |   "aeons": "eons",
  14 |   "aerogramme": "aerogram",
  15 |   "aerogrammes": "aerograms",
  16 |   "aeroplane": "airplane",
  17 |   "aeroplanes": "airplanes",
  18 |   "aesthete": "esthete",
  19 |   "aesthetes": "esthetes",
  20 |   "aesthetic": "esthetic",
  21 |   "aesthetically": "esthetically",
  22 |   "aesthetics": "esthetics",
  23 |   "aetiology": "etiology",
  24 |   "ageing": "aging",
  25 |   "aggrandisement": "aggrandizement",
  26 |   "agonise": "agonize",
  27 |   "agonised": "agonized",
  28 |   "agonises": "agonizes",
  29 |   "agonising": "agonizing",
  30 |   "agonisingly": "agonizingly",
  31 |   "almanack": "almanac",
  32 |   "almanacks": "almanacs",
  33 |   "aluminium": "aluminum",
  34 |   "amortisable": "amortizable",
  35 |   "amortisation": "amortization",
  36 |   "amortisations": "amortizations",
  37 |   "amortise": "amortize",
  38 |   "amortised": "amortized",
  39 |   "amortises": "amortizes",
  40 |   "amortising": "amortizing",
  41 |   "amphitheatre": "amphitheater",
  42 |   "amphitheatres": "amphitheaters",
  43 |   "anaemia": "anemia",
  44 |   "anaemic": "anemic",
  45 |   "anaesthesia": "anesthesia",
  46 |   "anaesthetic": "anesthetic",
  47 |   "anaesthetics": "anesthetics",
  48 |   "anaesthetise": "anesthetize",
  49 |   "anaesthetised": "anesthetized",
  50 |   "anaesthetises": "anesthetizes",
  51 |   "anaesthetising": "anesthetizing",
  52 |   "anaesthetist": "anesthetist",
  53 |   "anaesthetists": "anesthetists",
  54 |   "anaesthetize": "anesthetize",
  55 |   "anaesthetized": "anesthetized",
  56 |   "anaesthetizes": "anesthetizes",
  57 |   "anaesthetizing": "anesthetizing",
  58 |   "analogue": "analog",
  59 |   "analogues": "analogs",
  60 |   "analyse": "analyze",
  61 |   "analysed": "analyzed",
  62 |   "analyses": "analyzes",
  63 |   "analysing": "analyzing",
  64 |   "anglicise": "anglicize",
  65 |   "anglicised": "anglicized",
  66 |   "anglicises": "anglicizes",
  67 |   "anglicising": "anglicizing",
  68 |   "annualised": "annualized",
  69 |   "antagonise": "antagonize",
  70 |   "antagonised": "antagonized",
  71 |   "antagonises": "antagonizes",
  72 |   "antagonising": "antagonizing",
  73 |   "apologise": "apologize",
  74 |   "apologised": "apologized",
  75 |   "apologises": "apologizes",
  76 |   "apologising": "apologizing",
  77 |   "appal": "appall",
  78 |   "appals": "appalls",
  79 |   "appetiser": "appetizer",
  80 |   "appetisers": "appetizers",
  81 |   "appetising": "appetizing",
  82 |   "appetisingly": "appetizingly",
  83 |   "arbour": "arbor",
  84 |   "arbours": "arbors",
  85 |   "archaeologically": "archeologically",
  86 |   "archaeologist": "archeologist",
  87 |   "archaeologists": "archeologists",
  88 |   "archaeology": "archeology</span>",
  89 |   "archeological": "archaeological",
  90 |   "ardour": "ardor",
  91 |   "armour": "armor",
  92 |   "armoured": "armored",
  93 |   "armourer": "armorer",
  94 |   "armourers": "armorers",
  95 |   "armouries": "armories",
  96 |   "armoury": "armory",
  97 |   "artefact": "artifact",
  98 |   "artefacts": "artifacts",
  99 |   "authorise": "authorize",
 100 |   "authorised": "authorized",
 101 |   "authorises": "authorizes",
 102 |   "authorising": "authorizing",
 103 |   "axe": "ax",
 104 |   "backpedalled": "backpedaled",
 105 |   "backpedalling": "backpedaling",
 106 |   "bannister": "banister",
 107 |   "bannisters": "banisters",
 108 |   "baptise": "baptize",
 109 |   "baptised": "baptized",
 110 |   "baptises": "baptizes",
 111 |   "baptising": "baptizing",
 112 |   "bastardise": "bastardize",
 113 |   "bastardised": "bastardized",
 114 |   "bastardises": "bastardizes",
 115 |   "bastardising": "bastardizing",
 116 |   "battleax": "battleaxe",
 117 |   "baulk": "balk",
 118 |   "baulked": "balked",
 119 |   "baulking": "balking",
 120 |   "baulks": "balks",
 121 |   "bedevilled": "bedeviled",
 122 |   "bedevilling": "bedeviling",
 123 |   "behaviour": "behavior",
 124 |   "behavioural": "behavioral",
 125 |   "behaviourism": "behaviorism",
 126 |   "behaviourist": "behaviorist",
 127 |   "behaviourists": "behaviorists",
 128 |   "behaviours": "behaviors",
 129 |   "behove": "behoove",
 130 |   "behoved": "behooved",
 131 |   "behoves": "behooves",
 132 |   "bejewelled": "bejeweled",
 133 |   "belabour": "belabor",
 134 |   "belaboured": "belabored",
 135 |   "belabouring": "belaboring",
 136 |   "belabours": "belabors",
 137 |   "bevelled": "beveled",
 138 |   "bevvies": "bevies",
 139 |   "bevvy": "bevy",
 140 |   "biassed": "biased",
 141 |   "biassing": "biasing",
 142 |   "bingeing": "binging",
 143 |   "bougainvillaea": "bougainvillea",
 144 |   "bougainvillaeas": "bougainvilleas",
 145 |   "bowdlerise": "bowdlerize",
 146 |   "bowdlerised": "bowdlerized",
 147 |   "bowdlerises": "bowdlerizes",
 148 |   "bowdlerising": "bowdlerizing",
 149 |   "breathalyse": "breathalyze",
 150 |   "breathalysed": "breathalyzed",
 151 |   "breathalyser": "breathalyzer",
 152 |   "breathalysers": "breathalyzers",
 153 |   "breathalyses": "breathalyzes",
 154 |   "breathalysing": "breathalyzing",
 155 |   "brutalise": "brutalize",
 156 |   "brutalised": "brutalized",
 157 |   "brutalises": "brutalizes",
 158 |   "brutalising": "brutalizing",
 159 |   "busses": "buses",
 160 |   "bussing": "busing",
 161 |   "caesarean": "cesarean",
 162 |   "caesareans": "cesareans",
 163 |   "calibre": "caliber",
 164 |   "calibres": "calibers",
 165 |   "calliper": "caliper",
 166 |   "callipers": "calipers",
 167 |   "callisthenics": "calisthenics",
 168 |   "canalise": "canalize",
 169 |   "canalised": "canalized",
 170 |   "canalises": "canalizes",
 171 |   "canalising": "canalizing",
 172 |   "cancelation": "cancellation",
 173 |   "cancelations": "cancellations",
 174 |   "cancelled": "canceled",
 175 |   "cancelling": "canceling",
 176 |   "candour": "candor",
 177 |   "cannibalise": "cannibalize",
 178 |   "cannibalised": "cannibalized",
 179 |   "cannibalises": "cannibalizes",
 180 |   "cannibalising": "cannibalizing",
 181 |   "canonise": "canonize",
 182 |   "canonised": "canonized",
 183 |   "canonises": "canonizes",
 184 |   "canonising": "canonizing",
 185 |   "capitalise": "capitalize",
 186 |   "capitalised": "capitalized",
 187 |   "capitalises": "capitalizes",
 188 |   "capitalising": "capitalizing",
 189 |   "caramelise": "caramelize",
 190 |   "caramelised": "caramelized",
 191 |   "caramelises": "caramelizes",
 192 |   "caramelising": "caramelizing",
 193 |   "carbonise": "carbonize",
 194 |   "carbonised": "carbonized",
 195 |   "carbonises": "carbonizes",
 196 |   "carbonising": "carbonizing",
 197 |   "carolled": "caroled",
 198 |   "carolling": "caroling",
 199 |   "catalogue": "catalog",
 200 |   "catalogued": "cataloged",
 201 |   "catalogues": "catalogs",
 202 |   "cataloguing": "cataloging",
 203 |   "catalyse": "catalyze",
 204 |   "catalysed": "catalyzed",
 205 |   "catalyses": "catalyzes",
 206 |   "catalysing": "catalyzing",
 207 |   "categorise": "categorize",
 208 |   "categorised": "categorized",
 209 |   "categorises": "categorizes",
 210 |   "categorising": "categorizing",
 211 |   "cauterise": "cauterize",
 212 |   "cauterised": "cauterized",
 213 |   "cauterises": "cauterizes",
 214 |   "cauterising": "cauterizing",
 215 |   "cavilled": "caviled",
 216 |   "cavilling": "caviling",
 217 |   "centigramme": "centigram",
 218 |   "centigrammes": "centigrams",
 219 |   "centilitre": "centiliter",
 220 |   "centilitres": "centiliters",
 221 |   "centimetre": "centimeter",
 222 |   "centimetres": "centimeters",
 223 |   "centralise": "centralize",
 224 |   "centralised": "centralized",
 225 |   "centralises": "centralizes",
 226 |   "centralising": "centralizing",
 227 |   "centre": "center",
 228 |   "centred": "centered",
 229 |   "centrefold": "centerfold",
 230 |   "centrefolds": "centerfolds",
 231 |   "centrepiece": "centerpiece",
 232 |   "centrepieces": "centerpieces",
 233 |   "centres": "centers",
 234 |   "channelled": "channeled",
 235 |   "channelling": "channeling",
 236 |   "characterise": "characterize",
 237 |   "characterised": "characterized",
 238 |   "characterises": "characterizes",
 239 |   "characterising": "characterizing",
 240 |   "cheque": "check",
 241 |   "chequebook": "checkbook",
 242 |   "chequebooks": "checkbooks",
 243 |   "chequered": "checkered",
 244 |   "cheques": "checks",
 245 |   "chilli": "chili",
 246 |   "chimaera": "chimera",
 247 |   "chimaeras": "chimeras",
 248 |   "chiselled": "chiseled",
 249 |   "chiselling": "chiseling",
 250 |   "circularise": "circularize",
 251 |   "circularised": "circularized",
 252 |   "circularises": "circularizes",
 253 |   "circularising": "circularizing",
 254 |   "civilise": "civilize",
 255 |   "civilised": "civilized",
 256 |   "civilises": "civilizes",
 257 |   "civilising": "civilizing",
 258 |   "clamour": "clamor",
 259 |   "clamoured": "clamored",
 260 |   "clamouring": "clamoring",
 261 |   "clamours": "clamors",
 262 |   "clangour": "clangor",
 263 |   "clarinettist": "clarinetist",
 264 |   "clarinettists": "clarinetists",
 265 |   "collectivise": "collectivize",
 266 |   "collectivised": "collectivized",
 267 |   "collectivises": "collectivizes",
 268 |   "collectivising": "collectivizing",
 269 |   "colonisation": "colonization",
 270 |   "colonise": "colonize",
 271 |   "colonised": "colonized",
 272 |   "coloniser": "colonizer",
 273 |   "colonisers": "colonizers",
 274 |   "colonises": "colonizes",
 275 |   "colonising": "colonizing",
 276 |   "colour": "color",
 277 |   "colourant": "colorant",
 278 |   "colourants": "colorants",
 279 |   "coloured": "colored",
 280 |   "coloureds": "coloreds",
 281 |   "colourful": "colorful",
 282 |   "colourfully": "colorfully",
 283 |   "colouring": "coloring",
 284 |   "colourize": "colorize",
 285 |   "colourized": "colorized",
 286 |   "colourizes": "colorizes",
 287 |   "colourizing": "colorizing",
 288 |   "colourless": "colorless",
 289 |   "colours": "colors",
 290 |   "commercialise": "commercialize",
 291 |   "commercialised": "commercialized",
 292 |   "commercialises": "commercializes",
 293 |   "commercialising": "commercializing",
 294 |   "compartmentalise": "compartmentalize",
 295 |   "compartmentalised": "compartmentalized",
 296 |   "compartmentalises": "compartmentalizes",
 297 |   "compartmentalising": "compartmentalizing",
 298 |   "computerise": "computerize",
 299 |   "computerised": "computerized",
 300 |   "computerises": "computerizes",
 301 |   "computerising": "computerizing",
 302 |   "conceptualise": "conceptualize",
 303 |   "conceptualised": "conceptualized",
 304 |   "conceptualises": "conceptualizes",
 305 |   "conceptualising": "conceptualizing",
 306 |   "connexion": "connection",
 307 |   "connexions": "connections",
 308 |   "contextualise": "contextualize",
 309 |   "contextualised": "contextualized",
 310 |   "contextualises": "contextualizes",
 311 |   "contextualising": "contextualizing",
 312 |   "cosier": "cozier",
 313 |   "cosies": "cozies",
 314 |   "cosiest": "coziest",
 315 |   "cosily": "cozily",
 316 |   "cosiness": "coziness",
 317 |   "cosy": "cozy",
 318 |   "councillor": "councilor",
 319 |   "councillors": "councilors",
 320 |   "counselled": "counseled",
 321 |   "counselling": "counseling",
 322 |   "counsellor": "counselor",
 323 |   "counsellors": "counselors",
 324 |   "crenelated": "crenellated",
 325 |   "criminalise": "criminalize",
 326 |   "criminalised": "criminalized",
 327 |   "criminalises": "criminalizes",
 328 |   "criminalising": "criminalizing",
 329 |   "criticise": "criticize",
 330 |   "criticised": "criticized",
 331 |   "criticises": "criticizes",
 332 |   "criticising": "criticizing",
 333 |   "crueller": "crueler",
 334 |   "cruellest": "cruelest",
 335 |   "crystallisation": "crystallization",
 336 |   "crystallise": "crystallize",
 337 |   "crystallised": "crystallized",
 338 |   "crystallises": "crystallizes",
 339 |   "crystallising": "crystallizing",
 340 |   "cudgelled": "cudgeled",
 341 |   "cudgelling": "cudgeling",
 342 |   "customise": "customize",
 343 |   "customised": "customized",
 344 |   "customises": "customizes",
 345 |   "customising": "customizing",
 346 |   "cypher": "cipher",
 347 |   "cyphers": "ciphers",
 348 |   "decentralisation": "decentralization",
 349 |   "decentralise": "decentralize",
 350 |   "decentralised": "decentralized",
 351 |   "decentralises": "decentralizes",
 352 |   "decentralising": "decentralizing",
 353 |   "decriminalisation": "decriminalization",
 354 |   "decriminalise": "decriminalize",
 355 |   "decriminalised": "decriminalized",
 356 |   "decriminalises": "decriminalizes",
 357 |   "decriminalising": "decriminalizing",
 358 |   "defence": "defense",
 359 |   "defenceless": "defenseless",
 360 |   "defences": "defenses",
 361 |   "dehumanisation": "dehumanization",
 362 |   "dehumanise": "dehumanize",
 363 |   "dehumanised": "dehumanized",
 364 |   "dehumanises": "dehumanizes",
 365 |   "dehumanising": "dehumanizing",
 366 |   "demeanour": "demeanor",
 367 |   "demilitarisation": "demilitarization",
 368 |   "demilitarise": "demilitarize",
 369 |   "demilitarised": "demilitarized",
 370 |   "demilitarises": "demilitarizes",
 371 |   "demilitarising": "demilitarizing",
 372 |   "demobilisation": "demobilization",
 373 |   "demobilise": "demobilize",
 374 |   "demobilised": "demobilized",
 375 |   "demobilises": "demobilizes",
 376 |   "demobilising": "demobilizing",
 377 |   "democratisation": "democratization",
 378 |   "democratise": "democratize",
 379 |   "democratised": "democratized",
 380 |   "democratises": "democratizes",
 381 |   "democratising": "democratizing",
 382 |   "demonise": "demonize",
 383 |   "demonised": "demonized",
 384 |   "demonises": "demonizes",
 385 |   "demonising": "demonizing",
 386 |   "demoralisation": "demoralization",
 387 |   "demoralise": "demoralize",
 388 |   "demoralised": "demoralized",
 389 |   "demoralises": "demoralizes",
 390 |   "demoralising": "demoralizing",
 391 |   "denationalisation": "denationalization",
 392 |   "denationalise": "denationalize",
 393 |   "denationalised": "denationalized",
 394 |   "denationalises": "denationalizes",
 395 |   "denationalising": "denationalizing",
 396 |   "deodorise": "deodorize",
 397 |   "deodorised": "deodorized",
 398 |   "deodorises": "deodorizes",
 399 |   "deodorising": "deodorizing",
 400 |   "depersonalise": "depersonalize",
 401 |   "depersonalised": "depersonalized",
 402 |   "depersonalises": "depersonalizes",
 403 |   "depersonalising": "depersonalizing",
 404 |   "deputise": "deputize",
 405 |   "deputised": "deputized",
 406 |   "deputises": "deputizes",
 407 |   "deputising": "deputizing",
 408 |   "desensitisation": "desensitization",
 409 |   "desensitise": "desensitize",
 410 |   "desensitised": "desensitized",
 411 |   "desensitises": "desensitizes",
 412 |   "desensitising": "desensitizing",
 413 |   "destabilisation": "destabilization",
 414 |   "destabilise": "destabilize",
 415 |   "destabilised": "destabilized",
 416 |   "destabilises": "destabilizes",
 417 |   "destabilising": "destabilizing",
 418 |   "dialled": "dialed",
 419 |   "dialling": "dialing",
 420 |   "dialogue": "dialog",
 421 |   "dialogues": "dialogs",
 422 |   "diarrhoea": "diarrhea",
 423 |   "digitise": "digitize",
 424 |   "digitised": "digitized",
 425 |   "digitises": "digitizes",
 426 |   "digitising": "digitizing",
 427 |   "disc": "disk",
 428 |   "discolour": "discolor",
 429 |   "discoloured": "discolored",
 430 |   "discolouring": "discoloring",
 431 |   "discolours": "discolors",
 432 |   "discs": "disks",
 433 |   "disembowelled": "disemboweled",
 434 |   "disembowelling": "disemboweling",
 435 |   "disfavour": "disfavor",
 436 |   "dishevelled": "disheveled",
 437 |   "dishonour": "dishonor",
 438 |   "dishonourable": "dishonorable",
 439 |   "dishonourably": "dishonorably",
 440 |   "dishonoured": "dishonored",
 441 |   "dishonouring": "dishonoring",
 442 |   "dishonours": "dishonors",
 443 |   "disorganisation": "disorganization",
 444 |   "disorganised": "disorganized",
 445 |   "distil": "distill",
 446 |   "distils": "distills",
 447 |   "dramatisation": "dramatization",
 448 |   "dramatisations": "dramatizations",
 449 |   "dramatise": "dramatize",
 450 |   "dramatised": "dramatized",
 451 |   "dramatises": "dramatizes",
 452 |   "dramatising": "dramatizing",
 453 |   "draught": "draft",
 454 |   "draughtboard": "draftboard",
 455 |   "draughtboards": "draftboards",
 456 |   "draughtier": "draftier",
 457 |   "draughtiest": "draftiest",
 458 |   "draughts": "drafts",
 459 |   "draughtsman": "draftsman",
 460 |   "draughtsmanship": "draftsmanship",
 461 |   "draughtsmen": "draftsmen",
 462 |   "draughtswoman": "draftswoman",
 463 |   "draughtswomen": "draftswomen",
 464 |   "draughty": "drafty",
 465 |   "drivelled": "driveled",
 466 |   "drivelling": "driveling",
 467 |   "duelled": "dueled",
 468 |   "duelling": "dueling",
 469 |   "economise": "economize",
 470 |   "economised": "economized",
 471 |   "economises": "economizes",
 472 |   "economising": "economizing",
 473 |   "editorialise": "editorialize",
 474 |   "editorialised": "editorialized",
 475 |   "editorialises": "editorializes",
 476 |   "editorialising": "editorializing",
 477 |   "edoema": "edema",
 478 |   "empathise": "empathize",
 479 |   "empathised": "empathized",
 480 |   "empathises": "empathizes",
 481 |   "empathising": "empathizing",
 482 |   "emphasise": "emphasize",
 483 |   "emphasised": "emphasized",
 484 |   "emphasises": "emphasizes",
 485 |   "emphasising": "emphasizing",
 486 |   "enamelled": "enameled",
 487 |   "enamelling": "enameling",
 488 |   "enamoured": "enamored",
 489 |   "encyclopaedia": "encyclopedia",
 490 |   "encyclopaedias": "encyclopedias",
 491 |   "encyclopaedic": "encyclopedic",
 492 |   "endeavour": "endeavor",
 493 |   "endeavoured": "endeavored",
 494 |   "endeavouring": "endeavoring",
 495 |   "endeavours": "endeavors",
 496 |   "energise": "energize",
 497 |   "energised": "energized",
 498 |   "energises": "energizes",
 499 |   "energising": "energizing",
 500 |   "enrol": "enroll",
 501 |   "enrols": "enrolls",
 502 |   "enthral": "enthrall",
 503 |   "enthrals": "enthralls",
 504 |   "epaulette": "epaulet",
 505 |   "epaulettes": "epaulets",
 506 |   "epicentre": "epicenter",
 507 |   "epicentres": "epicenters",
 508 |   "epilogue": "epilog",
 509 |   "epilogues": "epilogs",
 510 |   "epitomise": "epitomize",
 511 |   "epitomised": "epitomized",
 512 |   "epitomises": "epitomizes",
 513 |   "epitomising": "epitomizing",
 514 |   "equalisation": "equalization",
 515 |   "equalise": "equalize",
 516 |   "equalised": "equalized",
 517 |   "equaliser": "equalizer",
 518 |   "equalisers": "equalizers",
 519 |   "equalises": "equalizes",
 520 |   "equalising": "equalizing",
 521 |   "eulogise": "eulogize",
 522 |   "eulogised": "eulogized",
 523 |   "eulogises": "eulogizes",
 524 |   "eulogising": "eulogizing",
 525 |   "evangelise": "evangelize",
 526 |   "evangelised": "evangelized",
 527 |   "evangelises": "evangelizes",
 528 |   "evangelising": "evangelizing",
 529 |   "exorcise": "exorcize",
 530 |   "exorcised": "exorcized",
 531 |   "exorcises": "exorcizes",
 532 |   "exorcising": "exorcizing",
 533 |   "extemporisation": "extemporization",
 534 |   "extemporise": "extemporize",
 535 |   "extemporised": "extemporized",
 536 |   "extemporises": "extemporizes",
 537 |   "extemporising": "extemporizing",
 538 |   "externalisation": "externalization",
 539 |   "externalisations": "externalizations",
 540 |   "externalise": "externalize",
 541 |   "externalised": "externalized",
 542 |   "externalises": "externalizes",
 543 |   "externalising": "externalizing",
 544 |   "factorise": "factorize",
 545 |   "factorised": "factorized",
 546 |   "factorises": "factorizes",
 547 |   "factorising": "factorizing",
 548 |   "faecal": "fecal",
 549 |   "faeces": "feces",
 550 |   "familiarisation": "familiarization",
 551 |   "familiarise": "familiarize",
 552 |   "familiarised": "familiarized",
 553 |   "familiarises": "familiarizes",
 554 |   "familiarising": "familiarizing",
 555 |   "fantasise": "fantasize",
 556 |   "fantasised": "fantasized",
 557 |   "fantasises": "fantasizes",
 558 |   "fantasising": "fantasizing",
 559 |   "favour": "favor",
 560 |   "favourable": "favorable",
 561 |   "favourably": "favorably",
 562 |   "favoured": "favored",
 563 |   "favouring": "favoring",
 564 |   "favourite": "favorite",
 565 |   "favourites": "favorites",
 566 |   "favouritism": "favoritism",
 567 |   "favours": "favors",
 568 |   "feminise": "feminize",
 569 |   "feminised": "feminized",
 570 |   "feminises": "feminizes",
 571 |   "feminising": "feminizing",
 572 |   "fertilisation": "fertilization",
 573 |   "fertilise": "fertilize",
 574 |   "fertilised": "fertilized",
 575 |   "fertiliser": "fertilizer",
 576 |   "fertilisers": "fertilizers",
 577 |   "fertilises": "fertilizes",
 578 |   "fertilising": "fertilizing",
 579 |   "fervour": "fervor",
 580 |   "fibre": "fiber",
 581 |   "fibreglass": "fiberglass",
 582 |   "fibres": "fibers",
 583 |   "fictionalisation": "fictionalization",
 584 |   "fictionalisations": "fictionalizations",
 585 |   "fictionalise": "fictionalize",
 586 |   "fictionalised": "fictionalized",
 587 |   "fictionalises": "fictionalizes",
 588 |   "fictionalising": "fictionalizing",
 589 |   "fillet": "filet",
 590 |   "filleted": "fileted",
 591 |   "filleting": "fileting",
 592 |   "fillets": "filets",
 593 |   "finalisation": "finalization",
 594 |   "finalise": "finalize",
 595 |   "finalised": "finalized",
 596 |   "finalises": "finalizes",
 597 |   "finalising": "finalizing",
 598 |   "flautist": "flutist",
 599 |   "flautists": "flutists",
 600 |   "flavour": "flavor",
 601 |   "flavoured": "flavored",
 602 |   "flavouring": "flavoring",
 603 |   "flavourings": "flavorings",
 604 |   "flavourless": "flavorless",
 605 |   "flavours": "flavors",
 606 |   "flavoursome": "flavorsome",
 607 |   "flyer / flier": "flier / flyer",
 608 |   "foetal": "fetal",
 609 |   "foetid": "fetid",
 610 |   "foetus": "fetus",
 611 |   "foetuses": "fetuses",
 612 |   "formalisation": "formalization",
 613 |   "formalise": "formalize",
 614 |   "formalised": "formalized",
 615 |   "formalises": "formalizes",
 616 |   "formalising": "formalizing",
 617 |   "fossilisation": "fossilization",
 618 |   "fossilise": "fossilize",
 619 |   "fossilised": "fossilized",
 620 |   "fossilises": "fossilizes",
 621 |   "fossilising": "fossilizing",
 622 |   "fraternisation": "fraternization",
 623 |   "fraternise": "fraternize",
 624 |   "fraternised": "fraternized",
 625 |   "fraternises": "fraternizes",
 626 |   "fraternising": "fraternizing",
 627 |   "fulfil": "fulfill",
 628 |   "fulfilment": "fulfillment",
 629 |   "fulfils": "fulfills",
 630 |   "funnelled": "funneled",
 631 |   "funnelling": "funneling",
 632 |   "gage": "gauge",
 633 |   "gaged": "gauged",
 634 |   "gages": "gauges",
 635 |   "gaging": "gauging",
 636 |   "galvanise": "galvanize",
 637 |   "galvanised": "galvanized",
 638 |   "galvanises": "galvanizes",
 639 |   "galvanising": "galvanizing",
 640 |   "gambolled": "gamboled",
 641 |   "gambolling": "gamboling",
 642 |   "gaol": "jail",
 643 |   "gaolbird": "jailbird",
 644 |   "gaolbirds": "jailbirds",
 645 |   "gaolbreak": "jailbreak",
 646 |   "gaolbreaks": "jailbreaks",
 647 |   "gaoled": "jailed",
 648 |   "gaoler": "jailer",
 649 |   "gaolers": "jailers",
 650 |   "gaoling": "jailing",
 651 |   "gaols": "jails",
 652 |   "gasses": "gases",
 653 |   "generalisation": "generalization",
 654 |   "generalisations": "generalizations",
 655 |   "generalise": "generalize",
 656 |   "generalised": "generalized",
 657 |   "generalises": "generalizes",
 658 |   "generalising": "generalizing",
 659 |   "ghettoise": "ghettoize",
 660 |   "ghettoised": "ghettoized",
 661 |   "ghettoises": "ghettoizes",
 662 |   "ghettoising": "ghettoizing",
 663 |   "gipsies": "gypsies",
 664 |   "glamor": "glamour",
 665 |   "glamorise": "glamorize",
 666 |   "glamorised": "glamorized",
 667 |   "glamorises": "glamorizes",
 668 |   "glamorising": "glamorizing",
 669 |   "globalisation": "globalization",
 670 |   "globalise": "globalize",
 671 |   "globalised": "globalized",
 672 |   "globalises": "globalizes",
 673 |   "globalising": "globalizing",
 674 |   "glueing": "gluing",
 675 |   "goitre": "goiter",
 676 |   "goitres": "goiters",
 677 |   "gonorrhoea": "gonorrhea",
 678 |   "gramme": "gram",
 679 |   "grammes": "grams",
 680 |   "gravelled": "graveled",
 681 |   "grey": "gray",
 682 |   "greyed": "grayed",
 683 |   "greying": "graying",
 684 |   "greyish": "grayish",
 685 |   "greyness": "grayness",
 686 |   "greys": "grays",
 687 |   "grovelled": "groveled",
 688 |   "grovelling": "groveling",
 689 |   "groyne": "groin",
 690 |   "groynes": "groins",
 691 |   "gruelling": "grueling",
 692 |   "gruellingly": "gruelingly",
 693 |   "gryphon": "griffin",
 694 |   "gryphons": "griffins",
 695 |   "gynaecological": "gynecological",
 696 |   "gynaecologist": "gynecologist",
 697 |   "gynaecologists": "gynecologists",
 698 |   "gynaecology": "gynecology",
 699 |   "haematological": "hematological",
 700 |   "haematologist": "hematologist",
 701 |   "haematologists": "hematologists",
 702 |   "haematology": "hematology",
 703 |   "haemoglobin": "hemoglobin",
 704 |   "haemophilia": "hemophilia",
 705 |   "haemophiliac": "hemophiliac",
 706 |   "haemophiliacs": "hemophiliacs",
 707 |   "haemorrhage": "hemorrhage",
 708 |   "haemorrhaged": "hemorrhaged",
 709 |   "haemorrhages": "hemorrhages",
 710 |   "haemorrhaging": "hemorrhaging",
 711 |   "haemorrhoids": "hemorrhoids",
 712 |   "harbour": "harbor",
 713 |   "harboured": "harbored",
 714 |   "harbouring": "harboring",
 715 |   "harbours": "harbors",
 716 |   "harmonisation": "harmonization",
 717 |   "harmonise": "harmonize",
 718 |   "harmonised": "harmonized",
 719 |   "harmonises": "harmonizes",
 720 |   "harmonising": "harmonizing",
 721 |   "homoeopath": "homeopath",
 722 |   "homoeopathic": "homeopathic",
 723 |   "homoeopaths": "homeopaths",
 724 |   "homoeopathy": "homeopathy",
 725 |   "homogenise": "homogenize",
 726 |   "homogenised": "homogenized",
 727 |   "homogenises": "homogenizes",
 728 |   "homogenising": "homogenizing",
 729 |   "honour": "honor",
 730 |   "honourable": "honorable",
 731 |   "honourably": "honorably",
 732 |   "honoured": "honored",
 733 |   "honouring": "honoring",
 734 |   "honours": "honors",
 735 |   "hospitalisation": "hospitalization",
 736 |   "hospitalise": "hospitalize",
 737 |   "hospitalised": "hospitalized",
 738 |   "hospitalises": "hospitalizes",
 739 |   "hospitalising": "hospitalizing",
 740 |   "humanise": "humanize",
 741 |   "humanised": "humanized",
 742 |   "humanises": "humanizes",
 743 |   "humanising": "humanizing",
 744 |   "humour": "humor",
 745 |   "humoured": "humored",
 746 |   "humouring": "humoring",
 747 |   "humourless": "humorless",
 748 |   "humours": "humors",
 749 |   "hybridise": "hybridize",
 750 |   "hybridised": "hybridized",
 751 |   "hybridises": "hybridizes",
 752 |   "hybridising": "hybridizing",
 753 |   "hypnotise": "hypnotize",
 754 |   "hypnotised": "hypnotized",
 755 |   "hypnotises": "hypnotizes",
 756 |   "hypnotising": "hypnotizing",
 757 |   "hypothesise": "hypothesize",
 758 |   "hypothesised": "hypothesized",
 759 |   "hypothesises": "hypothesizes",
 760 |   "hypothesising": "hypothesizing",
 761 |   "idealisation": "idealization",
 762 |   "idealise": "idealize",
 763 |   "idealised": "idealized",
 764 |   "idealises": "idealizes",
 765 |   "idealising": "idealizing",
 766 |   "idolise": "idolize",
 767 |   "idolised": "idolized",
 768 |   "idolises": "idolizes",
 769 |   "idolising": "idolizing",
 770 |   "immobilisation": "immobilization",
 771 |   "immobilise": "immobilize",
 772 |   "immobilised": "immobilized",
 773 |   "immobiliser": "immobilizer",
 774 |   "immobilisers": "immobilizers",
 775 |   "immobilises": "immobilizes",
 776 |   "immobilising": "immobilizing",
 777 |   "immortalise": "immortalize",
 778 |   "immortalised": "immortalized",
 779 |   "immortalises": "immortalizes",
 780 |   "immortalising": "immortalizing",
 781 |   "immunisation": "immunization",
 782 |   "immunise": "immunize",
 783 |   "immunised": "immunized",
 784 |   "immunises": "immunizes",
 785 |   "immunising": "immunizing",
 786 |   "impanelled": "impaneled",
 787 |   "impanelling": "impaneling",
 788 |   "imperilled": "imperiled",
 789 |   "imperilling": "imperiling",
 790 |   "individualise": "individualize",
 791 |   "individualised": "individualized",
 792 |   "individualises": "individualizes",
 793 |   "individualising": "individualizing",
 794 |   "industrialise": "industrialize",
 795 |   "industrialised": "industrialized",
 796 |   "industrialises": "industrializes",
 797 |   "industrialising": "industrializing",
 798 |   "inflexion": "inflection",
 799 |   "inflexions": "inflections",
 800 |   "initialise": "initialize",
 801 |   "initialised": "initialized",
 802 |   "initialises": "initializes",
 803 |   "initialising": "initializing",
 804 |   "initialled": "initialed",
 805 |   "initialling": "initialing",
 806 |   "instal": "install",
 807 |   "instalment": "installment",
 808 |   "instalments": "installments",
 809 |   "instals": "installs",
 810 |   "instil": "instill",
 811 |   "instils": "instills",
 812 |   "institutionalisation": "institutionalization",
 813 |   "institutionalise": "institutionalize",
 814 |   "institutionalised": "institutionalized",
 815 |   "institutionalises": "institutionalizes",
 816 |   "institutionalising": "institutionalizing",
 817 |   "intellectualise": "intellectualize",
 818 |   "intellectualised": "intellectualized",
 819 |   "intellectualises": "intellectualizes",
 820 |   "intellectualising": "intellectualizing",
 821 |   "internalisation": "internalization",
 822 |   "internalise": "internalize",
 823 |   "internalised": "internalized",
 824 |   "internalises": "internalizes",
 825 |   "internalising": "internalizing",
 826 |   "internationalisation": "internationalization",
 827 |   "internationalise": "internationalize",
 828 |   "internationalised": "internationalized",
 829 |   "internationalises": "internationalizes",
 830 |   "internationalising": "internationalizing",
 831 |   "ionisation": "ionization",
 832 |   "ionise": "ionize",
 833 |   "ionised": "ionized",
 834 |   "ioniser": "ionizer",
 835 |   "ionisers": "ionizers",
 836 |   "ionises": "ionizes",
 837 |   "ionising": "ionizing",
 838 |   "italicise": "italicize",
 839 |   "italicised": "italicized",
 840 |   "italicises": "italicizes",
 841 |   "italicising": "italicizing",
 842 |   "itemise": "itemize",
 843 |   "itemised": "itemized",
 844 |   "itemises": "itemizes",
 845 |   "itemising": "itemizing",
 846 |   "jeopardise": "jeopardize",
 847 |   "jeopardised": "jeopardized",
 848 |   "jeopardises": "jeopardizes",
 849 |   "jeopardising": "jeopardizing",
 850 |   "jewelled": "jeweled",
 851 |   "jeweller": "jeweler",
 852 |   "jewellers": "jewelers",
 853 |   "jewellery": "jewelry",
 854 |   "judgement": "judgment",
 855 |   "kilogramme": "kilogram",
 856 |   "kilogrammes": "kilograms",
 857 |   "kilometre": "kilometer",
 858 |   "kilometres": "kilometers",
 859 |   "labelled": "labeled",
 860 |   "labelling": "labeling",
 861 |   "labour": "labor",
 862 |   "laboured": "labored",
 863 |   "labourer": "laborer",
 864 |   "labourers": "laborers",
 865 |   "labouring": "laboring",
 866 |   "labours": "labors",
 867 |   "lacklustre": "lackluster",
 868 |   "legalisation": "legalization",
 869 |   "legalise": "legalize",
 870 |   "legalised": "legalized",
 871 |   "legalises": "legalizes",
 872 |   "legalising": "legalizing",
 873 |   "legitimise": "legitimize",
 874 |   "legitimised": "legitimized",
 875 |   "legitimises": "legitimizes",
 876 |   "legitimising": "legitimizing",
 877 |   "leukaemia": "leukemia",
 878 |   "levelled": "leveled",
 879 |   "leveller": "leveler",
 880 |   "levellers": "levelers",
 881 |   "levelling": "leveling",
 882 |   "libelled": "libeled",
 883 |   "libelling": "libeling",
 884 |   "libellous": "libelous",
 885 |   "liberalisation": "liberalization",
 886 |   "liberalise": "liberalize",
 887 |   "liberalised": "liberalized",
 888 |   "liberalises": "liberalizes",
 889 |   "liberalising": "liberalizing",
 890 |   "licence": "license",
 891 |   "licenced": "licensed",
 892 |   "licences": "licenses",
 893 |   "licencing": "licensing",
 894 |   "likeable": "likable",
 895 |   "lionisation": "lionization",
 896 |   "lionise": "lionize",
 897 |   "lionised": "lionized",
 898 |   "lionises": "lionizes",
 899 |   "lionising": "lionizing",
 900 |   "liquidise": "liquidize",
 901 |   "liquidised": "liquidized",
 902 |   "liquidiser": "liquidizer",
 903 |   "liquidisers": "liquidizers",
 904 |   "liquidises": "liquidizes",
 905 |   "liquidising": "liquidizing",
 906 |   "litre": "liter",
 907 |   "litres": "liters",
 908 |   "localise": "localize",
 909 |   "localised": "localized",
 910 |   "localises": "localizes",
 911 |   "localising": "localizing",
 912 |   "louvre": "louver",
 913 |   "louvred": "louvered",
 914 |   "louvres": "louvers",
 915 |   "lustre": "luster",
 916 |   "magnetise": "magnetize",
 917 |   "magnetised": "magnetized",
 918 |   "magnetises": "magnetizes",
 919 |   "magnetising": "magnetizing",
 920 |   "manoeuvrability": "maneuverability",
 921 |   "manoeuvrable": "maneuverable",
 922 |   "manoeuvre": "maneuver",
 923 |   "manoeuvred": "maneuvered",
 924 |   "manoeuvres": "maneuvers",
 925 |   "manoeuvring": "maneuvering",
 926 |   "manoeuvrings": "maneuverings",
 927 |   "marginalisation": "marginalization",
 928 |   "marginalise": "marginalize",
 929 |   "marginalised": "marginalized",
 930 |   "marginalises": "marginalizes",
 931 |   "marginalising": "marginalizing",
 932 |   "marshalled": "marshaled",
 933 |   "marshalling": "marshaling",
 934 |   "marvelled": "marveled",
 935 |   "marvelling": "marveling",
 936 |   "marvellous": "marvelous",
 937 |   "marvellously": "marvelously",
 938 |   "materialisation": "materialization",
 939 |   "materialise": "materialize",
 940 |   "materialised": "materialized",
 941 |   "materialises": "materializes",
 942 |   "materialising": "materializing",
 943 |   "maximisation": "maximization",
 944 |   "maximise": "maximize",
 945 |   "maximised": "maximized",
 946 |   "maximises": "maximizes",
 947 |   "maximising": "maximizing",
 948 |   "meagre": "meager",
 949 |   "mechanisation": "mechanization",
 950 |   "mechanise": "mechanize",
 951 |   "mechanised": "mechanized",
 952 |   "mechanises": "mechanizes",
 953 |   "mechanising": "mechanizing",
 954 |   "mediaeval": "medieval",
 955 |   "memorialise": "memorialize",
 956 |   "memorialised": "memorialized",
 957 |   "memorialises": "memorializes",
 958 |   "memorialising": "memorializing",
 959 |   "memorise": "memorize",
 960 |   "memorised": "memorized",
 961 |   "memorises": "memorizes",
 962 |   "memorising": "memorizing",
 963 |   "mesmerise": "mesmerize",
 964 |   "mesmerised": "mesmerized",
 965 |   "mesmerises": "mesmerizes",
 966 |   "mesmerising": "mesmerizing",
 967 |   "metabolise": "metabolize",
 968 |   "metabolised": "metabolized",
 969 |   "metabolises": "metabolizes",
 970 |   "metabolising": "metabolizing",
 971 |   "metre": "meter",
 972 |   "metres": "meters",
 973 |   "mhm": "hmm",
 974 |   "micrometre": "micrometer",
 975 |   "micrometres": "micrometers",
 976 |   "militarise": "militarize",
 977 |   "militarised": "militarized",
 978 |   "militarises": "militarizes",
 979 |   "militarising": "militarizing",
 980 |   "milligramme": "milligram",
 981 |   "milligrammes": "milligrams",
 982 |   "millilitre": "milliliter",
 983 |   "millilitres": "milliliters",
 984 |   "millimetre": "millimeter",
 985 |   "millimetres": "millimeters",
 986 |   "miniaturisation": "miniaturization",
 987 |   "miniaturise": "miniaturize",
 988 |   "miniaturised": "miniaturized",
 989 |   "miniaturises": "miniaturizes",
 990 |   "miniaturising": "miniaturizing",
 991 |   "minibusses": "minibuses",
 992 |   "minimise": "minimize",
 993 |   "minimised": "minimized",
 994 |   "minimises": "minimizes",
 995 |   "minimising": "minimizing",
 996 |   "misbehaviour": "misbehavior",
 997 |   "misdemeanour": "misdemeanor",
 998 |   "misdemeanours": "misdemeanors",
 999 |   "misspelt": "misspelled",
1000 |   "mitre": "miter",
1001 |   "mitres": "miters",
1002 |   "mm": "hmm",
1003 |   "mmm": "hmm",
1004 |   "mobilisation": "mobilization",
1005 |   "mobilise": "mobilize",
1006 |   "mobilised": "mobilized",
1007 |   "mobilises": "mobilizes",
1008 |   "mobilising": "mobilizing",
1009 |   "modelled": "modeled",
1010 |   "modeller": "modeler",
1011 |   "modellers": "modelers",
1012 |   "modelling": "modeling",
1013 |   "modernise": "modernize",
1014 |   "modernised": "modernized",
1015 |   "modernises": "modernizes",
1016 |   "modernising": "modernizing",
1017 |   "moisturise": "moisturize",
1018 |   "moisturised": "moisturized",
1019 |   "moisturiser": "moisturizer",
1020 |   "moisturisers": "moisturizers",
1021 |   "moisturises": "moisturizes",
1022 |   "moisturising": "moisturizing",
1023 |   "monologue": "monolog",
1024 |   "monologues": "monologs",
1025 |   "monopolisation": "monopolization",
1026 |   "monopolise": "monopolize",
1027 |   "monopolised": "monopolized",
1028 |   "monopolises": "monopolizes",
1029 |   "monopolising": "monopolizing",
1030 |   "moralise": "moralize",
1031 |   "moralised": "moralized",
1032 |   "moralises": "moralizes",
1033 |   "moralising": "moralizing",
1034 |   "motorised": "motorized",
1035 |   "mould": "mold",
1036 |   "moulded": "molded",
1037 |   "moulder": "molder",
1038 |   "mouldered": "moldered",
1039 |   "mouldering": "moldering",
1040 |   "moulders": "molders",
1041 |   "mouldier": "moldier",
1042 |   "mouldiest": "moldiest",
1043 |   "moulding": "molding",
1044 |   "mouldings": "moldings",
1045 |   "moulds": "molds",
1046 |   "mouldy": "moldy",
1047 |   "moult": "molt",
1048 |   "moulted": "molted",
1049 |   "moulting": "molting",
1050 |   "moults": "molts",
1051 |   "moustache": "mustache",
1052 |   "moustached": "mustached",
1053 |   "moustaches": "mustaches",
1054 |   "moustachioed": "mustachioed",
1055 |   "multicoloured": "multicolored",
1056 |   "nationalisation": "nationalization",
1057 |   "nationalisations": "nationalizations",
1058 |   "nationalise": "nationalize",
1059 |   "nationalised": "nationalized",
1060 |   "nationalises": "nationalizes",
1061 |   "nationalising": "nationalizing",
1062 |   "naturalisation": "naturalization",
1063 |   "naturalise": "naturalize",
1064 |   "naturalised": "naturalized",
1065 |   "naturalises": "naturalizes",
1066 |   "naturalising": "naturalizing",
1067 |   "neighbour": "neighbor",
1068 |   "neighbourhood": "neighborhood",
1069 |   "neighbourhoods": "neighborhoods",
1070 |   "neighbouring": "neighboring",
1071 |   "neighbourliness": "neighborliness",
1072 |   "neighbourly": "neighborly",
1073 |   "neighbours": "neighbors",
1074 |   "neutralisation": "neutralization",
1075 |   "neutralise": "neutralize",
1076 |   "neutralised": "neutralized",
1077 |   "neutralises": "neutralizes",
1078 |   "neutralising": "neutralizing",
1079 |   "normalisation": "normalization",
1080 |   "normalise": "normalize",
1081 |   "normalised": "normalized",
1082 |   "normalises": "normalizes",
1083 |   "normalising": "normalizing",
1084 |   "odour": "odor",
1085 |   "odourless": "odorless",
1086 |   "odours": "odors",
1087 |   "oesophagus": "esophagus",
1088 |   "oesophaguses": "esophaguses",
1089 |   "oestrogen": "estrogen",
1090 |   "offence": "offense",
1091 |   "offences": "offenses",
1092 |   "omelette": "omelet",
1093 |   "omelettes": "omelets",
1094 |   "optimise": "optimize",
1095 |   "optimised": "optimized",
1096 |   "optimises": "optimizes",
1097 |   "optimising": "optimizing",
1098 |   "organisation": "organization",
1099 |   "organisational": "organizational",
1100 |   "organisations": "organizations",
1101 |   "organise": "organize",
1102 |   "organised": "organized",
1103 |   "organiser": "organizer",
1104 |   "organisers": "organizers",
1105 |   "organises": "organizes",
1106 |   "organising": "organizing",
1107 |   "orthopaedic": "orthopedic",
1108 |   "orthopaedics": "orthopedics",
1109 |   "ostracise": "ostracize",
1110 |   "ostracised": "ostracized",
1111 |   "ostracises": "ostracizes",
1112 |   "ostracising": "ostracizing",
1113 |   "outmanoeuvre": "outmaneuver",
1114 |   "outmanoeuvred": "outmaneuvered",
1115 |   "outmanoeuvres": "outmaneuvers",
1116 |   "outmanoeuvring": "outmaneuvering",
1117 |   "overemphasise": "overemphasize",
1118 |   "overemphasised": "overemphasized",
1119 |   "overemphasises": "overemphasizes",
1120 |   "overemphasising": "overemphasizing",
1121 |   "oxidisation": "oxidization",
1122 |   "oxidise": "oxidize",
1123 |   "oxidised": "oxidized",
1124 |   "oxidises": "oxidizes",
1125 |   "oxidising": "oxidizing",
1126 |   "paederast": "pederast",
1127 |   "paederasts": "pederasts",
1128 |   "paediatric": "pediatric",
1129 |   "paediatrician": "pediatrician",
1130 |   "paediatricians": "pediatricians",
1131 |   "paediatrics": "pediatrics",
1132 |   "paedophile": "pedophile",
1133 |   "paedophiles": "pedophiles",
1134 |   "paedophilia": "pedophilia",
1135 |   "palaeolithic": "paleolithic",
1136 |   "palaeontologist": "paleontologist",
1137 |   "palaeontologists": "paleontologists",
1138 |   "palaeontology": "paleontology",
1139 |   "panelled": "paneled",
1140 |   "panelling": "paneling",
1141 |   "panellist": "panelist",
1142 |   "panellists": "panelists",
1143 |   "paralyse": "paralyze",
1144 |   "paralysed": "paralyzed",
1145 |   "paralyses": "paralyzes",
1146 |   "paralysing": "paralyzing",
1147 |   "parcelled": "parceled",
1148 |   "parcelling": "parceling",
1149 |   "parlour": "parlor",
1150 |   "parlours": "parlors",
1151 |   "particularise": "particularize",
1152 |   "particularised": "particularized",
1153 |   "particularises": "particularizes",
1154 |   "particularising": "particularizing",
1155 |   "passivisation": "passivization",
1156 |   "passivise": "passivize",
1157 |   "passivised": "passivized",
1158 |   "passivises": "passivizes",
1159 |   "passivising": "passivizing",
1160 |   "pasteurisation": "pasteurization",
1161 |   "pasteurise": "pasteurize",
1162 |   "pasteurised": "pasteurized",
1163 |   "pasteurises": "pasteurizes",
1164 |   "pasteurising": "pasteurizing",
1165 |   "patronise": "patronize",
1166 |   "patronised": "patronized",
1167 |   "patronises": "patronizes",
1168 |   "patronising": "patronizing",
1169 |   "patronisingly": "patronizingly",
1170 |   "pedalled": "pedaled",
1171 |   "pedalling": "pedaling",
1172 |   "pedestrianisation": "pedestrianization",
1173 |   "pedestrianise": "pedestrianize",
1174 |   "pedestrianised": "pedestrianized",
1175 |   "pedestrianises": "pedestrianizes",
1176 |   "pedestrianising": "pedestrianizing",
1177 |   "penalise": "penalize",
1178 |   "penalised": "penalized",
1179 |   "penalises": "penalizes",
1180 |   "penalising": "penalizing",
1181 |   "pencilled": "penciled",
1182 |   "pencilling": "penciling",
1183 |   "personalise": "personalize",
1184 |   "personalised": "personalized",
1185 |   "personalises": "personalizes",
1186 |   "personalising": "personalizing",
1187 |   "pharmacopoeia": "pharmacopeia",
1188 |   "pharmacopoeias": "pharmacopeias",
1189 |   "philosophise": "philosophize",
1190 |   "philosophised": "philosophized",
1191 |   "philosophises": "philosophizes",
1192 |   "philosophising": "philosophizing",
1193 |   "philtre": "filter",
1194 |   "philtres": "filters",
1195 |   "phoney": "phony",
1196 |   "plagiarise": "plagiarize",
1197 |   "plagiarised": "plagiarized",
1198 |   "plagiarises": "plagiarizes",
1199 |   "plagiarising": "plagiarizing",
1200 |   "plough": "plow",
1201 |   "ploughed": "plowed",
1202 |   "ploughing": "plowing",
1203 |   "ploughman": "plowman",
1204 |   "ploughmen": "plowmen",
1205 |   "ploughs": "plows",
1206 |   "ploughshare": "plowshare",
1207 |   "ploughshares": "plowshares",
1208 |   "polarisation": "polarization",
1209 |   "polarise": "polarize",
1210 |   "polarised": "polarized",
1211 |   "polarises": "polarizes",
1212 |   "polarising": "polarizing",
1213 |   "politicisation": "politicization",
1214 |   "politicise": "politicize",
1215 |   "politicised": "politicized",
1216 |   "politicises": "politicizes",
1217 |   "politicising": "politicizing",
1218 |   "popularisation": "popularization",
1219 |   "popularise": "popularize",
1220 |   "popularised": "popularized",
1221 |   "popularises": "popularizes",
1222 |   "popularising": "popularizing",
1223 |   "pouffe": "pouf",
1224 |   "pouffes": "poufs",
1225 |   "practise": "practice",
1226 |   "practised": "practiced",
1227 |   "practises": "practices",
1228 |   "practising": "practicing",
1229 |   "praesidium": "presidium",
1230 |   "praesidiums": "presidiums",
1231 |   "pressurisation": "pressurization",
1232 |   "pressurise": "pressurize",
1233 |   "pressurised": "pressurized",
1234 |   "pressurises": "pressurizes",
1235 |   "pressurising": "pressurizing",
1236 |   "pretence": "pretense",
1237 |   "pretences": "pretenses",
1238 |   "primaeval": "primeval",
1239 |   "prioritisation": "prioritization",
1240 |   "prioritise": "prioritize",
1241 |   "prioritised": "prioritized",
1242 |   "prioritises": "prioritizes",
1243 |   "prioritising": "prioritizing",
1244 |   "privatisation": "privatization",
1245 |   "privatisations": "privatizations",
1246 |   "privatise": "privatize",
1247 |   "privatised": "privatized",
1248 |   "privatises": "privatizes",
1249 |   "privatising": "privatizing",
1250 |   "professionalisation": "professionalization",
1251 |   "professionalise": "professionalize",
1252 |   "professionalised": "professionalized",
1253 |   "professionalises": "professionalizes",
1254 |   "professionalising": "professionalizing",
1255 |   "programme": "program",
1256 |   "programmes": "programs",
1257 |   "prologue": "prolog",
1258 |   "prologues": "prologs",
1259 |   "propagandise": "propagandize",
1260 |   "propagandised": "propagandized",
1261 |   "propagandises": "propagandizes",
1262 |   "propagandising": "propagandizing",
1263 |   "proselytise": "proselytize",
1264 |   "proselytised": "proselytized",
1265 |   "proselytiser": "proselytizer",
1266 |   "proselytisers": "proselytizers",
1267 |   "proselytises": "proselytizes",
1268 |   "proselytising": "proselytizing",
1269 |   "psychoanalyse": "psychoanalyze",
1270 |   "psychoanalysed": "psychoanalyzed",
1271 |   "psychoanalyses": "psychoanalyzes",
1272 |   "psychoanalysing": "psychoanalyzing",
1273 |   "publicise": "publicize",
1274 |   "publicised": "publicized",
1275 |   "publicises": "publicizes",
1276 |   "publicising": "publicizing",
1277 |   "pulverisation": "pulverization",
1278 |   "pulverise": "pulverize",
1279 |   "pulverised": "pulverized",
1280 |   "pulverises": "pulverizes",
1281 |   "pulverising": "pulverizing",
1282 |   "pummelled": "pummel",
1283 |   "pummelling": "pummeled",
1284 |   "pyjama": "pajama",
1285 |   "pyjamas": "pajamas",
1286 |   "pzazz": "pizzazz",
1287 |   "quarrelled": "quarreled",
1288 |   "quarrelling": "quarreling",
1289 |   "radicalise": "radicalize",
1290 |   "radicalised": "radicalized",
1291 |   "radicalises": "radicalizes",
1292 |   "radicalising": "radicalizing",
1293 |   "rancour": "rancor",
1294 |   "randomise": "randomize",
1295 |   "randomised": "randomized",
1296 |   "randomises": "randomizes",
1297 |   "randomising": "randomizing",
1298 |   "rationalisation": "rationalization",
1299 |   "rationalisations": "rationalizations",
1300 |   "rationalise": "rationalize",
1301 |   "rationalised": "rationalized",
1302 |   "rationalises": "rationalizes",
1303 |   "rationalising": "rationalizing",
1304 |   "ravelled": "raveled",
1305 |   "ravelling": "raveling",
1306 |   "realisable": "realizable",
1307 |   "realisation": "realization",
1308 |   "realisations": "realizations",
1309 |   "realise": "realize",
1310 |   "realised": "realized",
1311 |   "realises": "realizes",
1312 |   "realising": "realizing",
1313 |   "recognisable": "recognizable",
1314 |   "recognisably": "recognizably",
1315 |   "recognisance": "recognizance",
1316 |   "recognise": "recognize",
1317 |   "recognised": "recognized",
1318 |   "recognises": "recognizes",
1319 |   "recognising": "recognizing",
1320 |   "reconnoitre": "reconnoiter",
1321 |   "reconnoitred": "reconnoitered",
1322 |   "reconnoitres": "reconnoiters",
1323 |   "reconnoitring": "reconnoitering",
1324 |   "refuelled": "refueled",
1325 |   "refuelling": "refueling",
1326 |   "regularisation": "regularization",
1327 |   "regularise": "regularize",
1328 |   "regularised": "regularized",
1329 |   "regularises": "regularizes",
1330 |   "regularising": "regularizing",
1331 |   "remodelled": "remodeled",
1332 |   "remodelling": "remodeling",
1333 |   "remould": "remold",
1334 |   "remoulded": "remolded",
1335 |   "remoulding": "remolding",
1336 |   "remoulds": "remolds",
1337 |   "reorganisation": "reorganization",
1338 |   "reorganisations": "reorganizations",
1339 |   "reorganise": "reorganize",
1340 |   "reorganised": "reorganized",
1341 |   "reorganises": "reorganizes",
1342 |   "reorganising": "reorganizing",
1343 |   "revelled": "reveled",
1344 |   "reveller": "reveler",
1345 |   "revellers": "revelers",
1346 |   "revelling": "reveling",
1347 |   "revitalise": "revitalize",
1348 |   "revitalised": "revitalized",
1349 |   "revitalises": "revitalizes",
1350 |   "revitalising": "revitalizing",
1351 |   "revolutionise": "revolutionize",
1352 |   "revolutionised": "revolutionized",
1353 |   "revolutionises": "revolutionizes",
1354 |   "revolutionising": "revolutionizing",
1355 |   "rhapsodise": "rhapsodize",
1356 |   "rhapsodised": "rhapsodized",
1357 |   "rhapsodises": "rhapsodizes",
1358 |   "rhapsodising": "rhapsodizing",
1359 |   "rigour": "rigor",
1360 |   "rigours": "rigors",
1361 |   "ritualised": "ritualized",
1362 |   "rivalled": "rivaled",
1363 |   "rivalling": "rivaling",
1364 |   "romanticise": "romanticize",
1365 |   "romanticised": "romanticized",
1366 |   "romanticises": "romanticizes",
1367 |   "romanticising": "romanticizing",
1368 |   "rumour": "rumor",
1369 |   "rumoured": "rumored",
1370 |   "rumours": "rumors",
1371 |   "sabre": "saber",
1372 |   "sabres": "sabers",
1373 |   "saltpetre": "saltpeter",
1374 |   "sanitise": "sanitize",
1375 |   "sanitised": "sanitized",
1376 |   "sanitises": "sanitizes",
1377 |   "sanitising": "sanitizing",
1378 |   "satirise": "satirize",
1379 |   "satirised": "satirized",
1380 |   "satirises": "satirizes",
1381 |   "satirising": "satirizing",
1382 |   "saviour": "savior",
1383 |   "saviours": "saviors",
1384 |   "savour": "savor",
1385 |   "savoured": "savored",
1386 |   "savouries": "savories",
1387 |   "savouring": "savoring",
1388 |   "savours": "savors",
1389 |   "savoury": "savory",
1390 |   "scandalise": "scandalize",
1391 |   "scandalised": "scandalized",
1392 |   "scandalises": "scandalizes",
1393 |   "scandalising": "scandalizing",
1394 |   "sceptic": "skeptic",
1395 |   "sceptical": "skeptical",
1396 |   "sceptically": "skeptically",
1397 |   "scepticism": "skepticism",
1398 |   "sceptics": "skeptics",
1399 |   "sceptre": "scepter",
1400 |   "sceptres": "scepters",
1401 |   "scrutinise": "scrutinize",
1402 |   "scrutinised": "scrutinized",
1403 |   "scrutinises": "scrutinizes",
1404 |   "scrutinising": "scrutinizing",
1405 |   "secularisation": "secularization",
1406 |   "secularise": "secularize",
1407 |   "secularised": "secularized",
1408 |   "secularises": "secularizes",
1409 |   "secularising": "secularizing",
1410 |   "sensationalise": "sensationalize",
1411 |   "sensationalised": "sensationalized",
1412 |   "sensationalises": "sensationalizes",
1413 |   "sensationalising": "sensationalizing",
1414 |   "sensitise": "sensitize",
1415 |   "sensitised": "sensitized",
1416 |   "sensitises": "sensitizes",
1417 |   "sensitising": "sensitizing",
1418 |   "sentimentalise": "sentimentalize",
1419 |   "sentimentalised": "sentimentalized",
1420 |   "sentimentalises": "sentimentalizes",
1421 |   "sentimentalising": "sentimentalizing",
1422 |   "sepulchre": "sepulcher",
1423 |   "sepulchres": "sepulchers",
1424 |   "serialisation": "serialization",
1425 |   "serialisations": "serializations",
1426 |   "serialise": "serialize",
1427 |   "serialised": "serialized",
1428 |   "serialises": "serializes",
1429 |   "serialising": "serializing",
1430 |   "sermonise": "sermonize",
1431 |   "sermonised": "sermonized",
1432 |   "sermonises": "sermonizes",
1433 |   "sermonising": "sermonizing",
1434 |   "sheikh": "sheik",
1435 |   "shovelled": "shoveled",
1436 |   "shovelling": "shoveling",
1437 |   "shrivelled": "shriveled",
1438 |   "shrivelling": "shriveling",
1439 |   "signalise": "signalize",
1440 |   "signalised": "signalized",
1441 |   "signalises": "signalizes",
1442 |   "signalising": "signalizing",
1443 |   "signalled": "signaled",
1444 |   "signalling": "signaling",
1445 |   "smoulder": "smolder",
1446 |   "smouldered": "smoldered",
1447 |   "smouldering": "smoldering",
1448 |   "smoulders": "smolders",
1449 |   "snivelled": "sniveled",
1450 |   "snivelling": "sniveling",
1451 |   "snorkelled": "snorkeled",
1452 |   "snorkelling": "snorkeling",
1453 |   "snowplough": "snowplow",
1454 |   "snowploughs": "snowplow",
1455 |   "socialisation": "socialization",
1456 |   "socialise": "socialize",
1457 |   "socialised": "socialized",
1458 |   "socialises": "socializes",
1459 |   "socialising": "socializing",
1460 |   "sodomise": "sodomize",
1461 |   "sodomised": "sodomized",
1462 |   "sodomises": "sodomizes",
1463 |   "sodomising": "sodomizing",
1464 |   "solemnise": "solemnize",
1465 |   "solemnised": "solemnized",
1466 |   "solemnises": "solemnizes",
1467 |   "solemnising": "solemnizing",
1468 |   "sombre": "somber",
1469 |   "specialisation": "specialization",
1470 |   "specialisations": "specializations",
1471 |   "specialise": "specialize",
1472 |   "specialised": "specialized",
1473 |   "specialises": "specializes",
1474 |   "specialising": "specializing",
1475 |   "spectre": "specter",
1476 |   "spectres": "specters",
1477 |   "spiralled": "spiraled",
1478 |   "spiralling": "spiraling",
1479 |   "splendour": "splendor",
1480 |   "splendours": "splendors",
1481 |   "squirrelled": "squirreled",
1482 |   "squirrelling": "squirreling",
1483 |   "stabilisation": "stabilization",
1484 |   "stabilise": "stabilize",
1485 |   "stabilised": "stabilized",
1486 |   "stabiliser": "stabilizer",
1487 |   "stabilisers": "stabilizers",
1488 |   "stabilises": "stabilizes",
1489 |   "stabilising": "stabilizing",
1490 |   "standardisation": "standardization",
1491 |   "standardise": "standardize",
1492 |   "standardised": "standardized",
1493 |   "standardises": "standardizes",
1494 |   "standardising": "standardizing",
1495 |   "stencilled": "stenciled",
1496 |   "stencilling": "stenciling",
1497 |   "sterilisation": "sterilization",
1498 |   "sterilisations": "sterilizations",
1499 |   "sterilise": "sterilize",
1500 |   "sterilised": "sterilized",
1501 |   "steriliser": "sterilizer",
1502 |   "sterilisers": "sterilizers",
1503 |   "sterilises": "sterilizes",
1504 |   "sterilising": "sterilizing",
1505 |   "stigmatisation": "stigmatization",
1506 |   "stigmatise": "stigmatize",
1507 |   "stigmatised": "stigmatized",
1508 |   "stigmatises": "stigmatizes",
1509 |   "stigmatising": "stigmatizing",
1510 |   "storey": "story",
1511 |   "storeys": "stories",
1512 |   "subsidisation": "subsidization",
1513 |   "subsidise": "subsidize",
1514 |   "subsidised": "subsidized",
1515 |   "subsidiser": "subsidizer",
1516 |   "subsidisers": "subsidizers",
1517 |   "subsidises": "subsidizes",
1518 |   "subsidising": "subsidizing",
1519 |   "succour": "succor",
1520 |   "succoured": "succored",
1521 |   "succouring": "succoring",
1522 |   "succours": "succors",
1523 |   "sulphate": "sulfate",
1524 |   "sulphates": "sulfates",
1525 |   "sulphide": "sulfide",
1526 |   "sulphides": "sulfides",
1527 |   "sulphur": "sulfur",
1528 |   "sulphurous": "sulfurous",
1529 |   "summarise": "summarize",
1530 |   "summarised": "summarized",
1531 |   "summarises": "summarizes",
1532 |   "summarising": "summarizing",
1533 |   "swivelled": "swiveled",
1534 |   "swivelling": "swiveling",
1535 |   "symbolise": "symbolize",
1536 |   "symbolised": "symbolized",
1537 |   "symbolises": "symbolizes",
1538 |   "symbolising": "symbolizing",
1539 |   "sympathise": "sympathize",
1540 |   "sympathised": "sympathized",
1541 |   "sympathiser": "sympathizer",
1542 |   "sympathisers": "sympathizers",
1543 |   "sympathises": "sympathizes",
1544 |   "sympathising": "sympathizing",
1545 |   "synchronisation": "synchronization",
1546 |   "synchronise": "synchronize",
1547 |   "synchronised": "synchronized",
1548 |   "synchronises": "synchronizes",
1549 |   "synchronising": "synchronizing",
1550 |   "synthesise": "synthesize",
1551 |   "synthesised": "synthesized",
1552 |   "synthesiser": "synthesizer",
1553 |   "synthesisers": "synthesizers",
1554 |   "synthesises": "synthesizes",
1555 |   "synthesising": "synthesizing",
1556 |   "syphon": "siphon",
1557 |   "syphoned": "siphoned",
1558 |   "syphoning": "siphoning",
1559 |   "syphons": "siphons",
1560 |   "systematisation": "systematization",
1561 |   "systematise": "systematize",
1562 |   "systematised": "systematized",
1563 |   "systematises": "systematizes",
1564 |   "systematising": "systematizing",
1565 |   "tantalise": "tantalize",
1566 |   "tantalised": "tantalized",
1567 |   "tantalises": "tantalizes",
1568 |   "tantalising": "tantalizing",
1569 |   "tantalisingly": "tantalizingly",
1570 |   "tasselled": "tasseled",
1571 |   "technicolour": "technicolor",
1572 |   "temporise": "temporize",
1573 |   "temporised": "temporized",
1574 |   "temporises": "temporizes",
1575 |   "temporising": "temporizing",
1576 |   "tenderise": "tenderize",
1577 |   "tenderised": "tenderized",
1578 |   "tenderises": "tenderizes",
1579 |   "tenderising": "tenderizing",
1580 |   "terrorise": "terrorize",
1581 |   "terrorised": "terrorized",
1582 |   "terrorises": "terrorizes",
1583 |   "terrorising": "terrorizing",
1584 |   "theatre": "theater",
1585 |   "theatregoer": "theatergoer",
1586 |   "theatregoers": "theatergoers",
1587 |   "theatres": "theaters",
1588 |   "theorise": "theorize",
1589 |   "theorised": "theorized",
1590 |   "theorises": "theorizes",
1591 |   "theorising": "theorizing",
1592 |   "tonne": "ton",
1593 |   "tonnes": "tons",
1594 |   "towelled": "toweled",
1595 |   "towelling": "toweling",
1596 |   "toxaemia": "toxemia",
1597 |   "tranquillise": "tranquilize",
1598 |   "tranquillised": "tranquilized",
1599 |   "tranquilliser": "tranquilizer",
1600 |   "tranquillisers": "tranquilizers",
1601 |   "tranquillises": "tranquilizes",
1602 |   "tranquillising": "tranquilizing",
1603 |   "tranquillity": "tranquility",
1604 |   "tranquillize": "tranquilize",
1605 |   "tranquillized": "tranquilized",
1606 |   "tranquillizer": "tranquilizer",
1607 |   "tranquillizers": "tranquilizers",
1608 |   "tranquillizes": "tranquilizes",
1609 |   "tranquillizing": "tranquilizing",
1610 |   "tranquilly": "tranquility",
1611 |   "transistorised": "transistorized",
1612 |   "traumatise": "traumatize",
1613 |   "traumatised": "traumatized",
1614 |   "traumatises": "traumatizes",
1615 |   "traumatising": "traumatizing",
1616 |   "travelled": "traveled",
1617 |   "traveller": "traveler",
1618 |   "travellers": "travelers",
1619 |   "travelling": "traveling",
1620 |   "travelog": "travelogue",
1621 |   "travelogs": "travelogues",
1622 |   "trialled": "trialed",
1623 |   "trialling": "trialing",
1624 |   "tricolour": "tricolor",
1625 |   "tricolours": "tricolors",
1626 |   "trivialise": "trivialize",
1627 |   "trivialised": "trivialized",
1628 |   "trivialises": "trivializes",
1629 |   "trivialising": "trivializing",
1630 |   "tumour": "tumor",
1631 |   "tumours": "tumors",
1632 |   "tunnelled": "tunneled",
1633 |   "tunnelling": "tunneling",
1634 |   "tyrannise": "tyrannize",
1635 |   "tyrannised": "tyrannized",
1636 |   "tyrannises": "tyrannizes",
1637 |   "tyrannising": "tyrannizing",
1638 |   "tyre": "tire",
1639 |   "tyres": "tires",
1640 |   "unauthorised": "unauthorized",
1641 |   "uncivilised": "uncivilized",
1642 |   "underutilised": "underutilized",
1643 |   "unequalled": "unequaled",
1644 |   "unfavourable": "unfavorable",
1645 |   "unfavourably": "unfavorably",
1646 |   "unionisation": "unionization",
1647 |   "unionise": "unionize",
1648 |   "unionised": "unionized",
1649 |   "unionises": "unionizes",
1650 |   "unionising": "unionizing",
1651 |   "unorganised": "unorganized",
1652 |   "unravelled": "unraveled",
1653 |   "unravelling": "unraveling",
1654 |   "unrecognisable": "unrecognizable",
1655 |   "unrecognised": "unrecognized",
1656 |   "unrivalled": "unrivaled",
1657 |   "unsavoury": "unsavory",
1658 |   "untrammelled": "untrammeled",
1659 |   "urbanisation": "urbanization",
1660 |   "urbanise": "urbanize",
1661 |   "urbanised": "urbanized",
1662 |   "urbanises": "urbanizes",
1663 |   "urbanising": "urbanizing",
1664 |   "utilisable": "utilizable",
1665 |   "utilisation": "utilization",
1666 |   "utilise": "utilize",
1667 |   "utilised": "utilized",
1668 |   "utilises": "utilizes",
1669 |   "utilising": "utilizing",
1670 |   "valour": "valor",
1671 |   "vandalise": "vandalize",
1672 |   "vandalised": "vandalized",
1673 |   "vandalises": "vandalizes",
1674 |   "vandalising": "vandalizing",
1675 |   "vaporisation": "vaporization",
1676 |   "vaporise": "vaporize",
1677 |   "vaporised": "vaporized",
1678 |   "vaporises": "vaporizes",
1679 |   "vaporising": "vaporizing",
1680 |   "vapour": "vapor",
1681 |   "vapours": "vapors",
1682 |   "verbalise": "verbalize",
1683 |   "verbalised": "verbalized",
1684 |   "verbalises": "verbalizes",
1685 |   "verbalising": "verbalizing",
1686 |   "victimisation": "victimization",
1687 |   "victimise": "victimize",
1688 |   "victimised": "victimized",
1689 |   "victimises": "victimizes",
1690 |   "victimising": "victimizing",
1691 |   "videodisc": "videodisk",
1692 |   "videodiscs": "videodisks",
1693 |   "vigour": "vigor",
1694 |   "visualisation": "visualization",
1695 |   "visualisations": "visualizations",
1696 |   "visualise": "visualize",
1697 |   "visualised": "visualized",
1698 |   "visualises": "visualizes",
1699 |   "visualising": "visualizing",
1700 |   "vocalisation": "vocalization",
1701 |   "vocalisations": "vocalizations",
1702 |   "vocalise": "vocalize",
1703 |   "vocalised": "vocalized",
1704 |   "vocalises": "vocalizes",
1705 |   "vocalising": "vocalizing",
1706 |   "vulcanised": "vulcanized",
1707 |   "vulgarisation": "vulgarization",
1708 |   "vulgarise": "vulgarize",
1709 |   "vulgarised": "vulgarized",
1710 |   "vulgarises": "vulgarizes",
1711 |   "vulgarising": "vulgarizing",
1712 |   "waggon": "wagon",
1713 |   "waggons": "wagons",
1714 |   "watercolour": "watercolor",
1715 |   "watercolours": "watercolors",
1716 |   "weaselled": "weaseled",
1717 |   "weaselling": "weaseling",
1718 |   "westernisation": "westernization",
1719 |   "westernise": "westernize",
1720 |   "westernised": "westernized",
1721 |   "westernises": "westernizes",
1722 |   "westernising": "westernizing",
1723 |   "womanise": "womanize",
1724 |   "womanised": "womanized",
1725 |   "womaniser": "womanizer",
1726 |   "womanisers": "womanizers",
1727 |   "womanises": "womanizes",
1728 |   "womanising": "womanizing",
1729 |   "woollen": "woolen",
1730 |   "woollens": "woolens",
1731 |   "woollies": "woolies",
1732 |   "woolly": "wooly",
1733 |   "worshipped": "worshiped",
1734 |   "worshipper": "worshiper",
1735 |   "worshipping": "worshiping",
1736 |   "yodelled": "yodeled",
1737 |   "yodelling": "yodeling",
1738 |   "yoghourt": "yogurt",
1739 |   "yoghourts": "yogurts",
1740 |   "yoghurt": "yogurt",
1741 |   "yoghurts": "yogurts"
1742 | }
1743 | 


--------------------------------------------------------------------------------
/benchmark/requirements.benchmark.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | jiwer
3 | datasets
4 | memory_profiler
5 | py3nvml
6 | pytubefix
7 | 


--------------------------------------------------------------------------------
/benchmark/speed_benchmark.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import timeit
 3 | 
 4 | from typing import Callable
 5 | 
 6 | from utils import inference
 7 | 
 8 | parser = argparse.ArgumentParser(description="Speed benchmark")
 9 | parser.add_argument(
10 |     "--repeat",
11 |     type=int,
12 |     default=3,
13 |     help="Times an experiment will be run.",
14 | )
15 | args = parser.parse_args()
16 | 
17 | 
18 | def measure_speed(func: Callable[[], None]):
19 |     # as written in https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat,
20 |     # min should be taken rather than the average
21 |     runtimes = timeit.repeat(
22 |         func,
23 |         repeat=args.repeat,
24 |         number=10,
25 |     )
26 |     print(runtimes)
27 |     print("Min execution time: %.3fs" % (min(runtimes) / 10.0))
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     measure_speed(inference)
32 | 


--------------------------------------------------------------------------------
/benchmark/utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from threading import Thread
 4 | from typing import Optional
 5 | 
 6 | from faster_whisper import WhisperModel
 7 | 
 8 | model_path = "large-v3"
 9 | model = WhisperModel(model_path, device="cuda")
10 | 
11 | 
12 | def inference():
13 |     segments, info = model.transcribe("benchmark.m4a", language="fr")
14 |     for segment in segments:
15 |         print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
16 | 
17 | 
18 | def get_logger(name: Optional[str] = None) -> logging.Logger:
19 |     formatter = logging.Formatter("%(levelname)s: %(message)s")
20 |     logger = logging.getLogger(name)
21 |     logger.setLevel(logging.DEBUG)
22 |     handler = logging.StreamHandler()
23 |     handler.setFormatter(formatter)
24 |     logger.addHandler(handler)
25 |     return logger
26 | 
27 | 
28 | class MyThread(Thread):
29 |     def __init__(self, func, params):
30 |         super(MyThread, self).__init__()
31 |         self.func = func
32 |         self.params = params
33 |         self.result = None
34 | 
35 |     def run(self):
36 |         self.result = self.func(*self.params)
37 | 
38 |     def get_result(self):
39 |         return self.result
40 | 


--------------------------------------------------------------------------------
/benchmark/wer_benchmark.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import os
 4 | 
 5 | from datasets import load_dataset
 6 | from jiwer import wer
 7 | from tqdm import tqdm
 8 | from transformers.models.whisper.english_normalizer import EnglishTextNormalizer
 9 | 
10 | from faster_whisper import WhisperModel
11 | 
12 | parser = argparse.ArgumentParser(description="WER benchmark")
13 | parser.add_argument(
14 |     "--audio_numb",
15 |     type=int,
16 |     default=None,
17 |     help="Specify the number of validation audio files in the dataset."
18 |     " Set to None to retrieve all audio files.",
19 | )
20 | args = parser.parse_args()
21 | 
22 | model_path = "large-v3"
23 | model = WhisperModel(model_path, device="cuda")
24 | 
25 | # load the dataset with streaming mode
26 | dataset = load_dataset("librispeech_asr", "clean", split="validation", streaming=True)
27 | 
28 | with open(os.path.join(os.path.dirname(__file__), "normalizer.json"), "r") as f:
29 |     normalizer = EnglishTextNormalizer(json.load(f))
30 | 
31 | 
32 | def inference(batch):
33 |     batch["transcription"] = []
34 |     for sample in batch["audio"]:
35 |         segments, info = model.transcribe(sample["array"], language="en")
36 |         batch["transcription"].append("".join([segment.text for segment in segments]))
37 |     batch["reference"] = batch["text"]
38 |     return batch
39 | 
40 | 
41 | dataset = dataset.map(function=inference, batched=True, batch_size=16)
42 | 
43 | all_transcriptions = []
44 | all_references = []
45 | 
46 | # iterate over the dataset and run inference
47 | for i, result in tqdm(enumerate(dataset), desc="Evaluating..."):
48 |     all_transcriptions.append(result["transcription"])
49 |     all_references.append(result["reference"])
50 |     if args.audio_numb and i == (args.audio_numb - 1):
51 |         break
52 | 
53 | # normalize predictions and references
54 | all_transcriptions = [normalizer(transcription) for transcription in all_transcriptions]
55 | all_references = [normalizer(reference) for reference in all_references]
56 | 
57 | # compute the WER metric
58 | word_error_rate = 100 * wer(hypothesis=all_transcriptions, reference=all_references)
59 | print("WER: %.3f" % word_error_rate)
60 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
2 | WORKDIR /root
3 | RUN apt-get update -y && apt-get install -y python3-pip
4 | COPY infer.py jfk.flac ./
5 | RUN pip3 install faster-whisper
6 | CMD ["python3", "infer.py"]
7 | 


--------------------------------------------------------------------------------
/docker/infer.py:
--------------------------------------------------------------------------------
1 | from faster_whisper import WhisperModel
2 | 
3 | jfk_path = "jfk.flac"
4 | model = WhisperModel("tiny", device="cuda")
5 | segments, info = model.transcribe(jfk_path, word_timestamps=True)
6 | for segment in segments:
7 |     print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
8 | 


--------------------------------------------------------------------------------
/docker/jfk.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/docker/jfk.flac


--------------------------------------------------------------------------------
/faster_whisper/__init__.py:
--------------------------------------------------------------------------------
 1 | from faster_whisper.audio import decode_audio
 2 | from faster_whisper.transcribe import BatchedInferencePipeline, WhisperModel
 3 | from faster_whisper.utils import available_models, download_model, format_timestamp
 4 | from faster_whisper.version import __version__
 5 | 
 6 | __all__ = [
 7 |     "available_models",
 8 |     "decode_audio",
 9 |     "WhisperModel",
10 |     "BatchedInferencePipeline",
11 |     "download_model",
12 |     "format_timestamp",
13 |     "__version__",
14 | ]
15 | 


--------------------------------------------------------------------------------
/faster_whisper/assets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/faster_whisper/assets/__init__.py


--------------------------------------------------------------------------------
/faster_whisper/assets/silero_decoder_v5.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/faster_whisper/assets/silero_decoder_v5.onnx


--------------------------------------------------------------------------------
/faster_whisper/assets/silero_encoder_v5.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/faster_whisper/assets/silero_encoder_v5.onnx


--------------------------------------------------------------------------------
/faster_whisper/audio.py:
--------------------------------------------------------------------------------
  1 | """We use the PyAV library to decode the audio: https://github.com/PyAV-Org/PyAV
  2 | 
  3 | The advantage of PyAV is that it bundles the FFmpeg libraries so there is no additional
  4 | system dependencies. FFmpeg does not need to be installed on the system.
  5 | 
  6 | However, the API is quite low-level so we need to manipulate audio frames directly.
  7 | """
  8 | 
  9 | import gc
 10 | import io
 11 | import itertools
 12 | 
 13 | from typing import BinaryIO, Union
 14 | 
 15 | import av
 16 | import numpy as np
 17 | 
 18 | 
 19 | def decode_audio(
 20 |     input_file: Union[str, BinaryIO],
 21 |     sampling_rate: int = 16000,
 22 |     split_stereo: bool = False,
 23 | ):
 24 |     """Decodes the audio.
 25 | 
 26 |     Args:
 27 |       input_file: Path to the input file or a file-like object.
 28 |       sampling_rate: Resample the audio to this sample rate.
 29 |       split_stereo: Return separate left and right channels.
 30 | 
 31 |     Returns:
 32 |       A float32 Numpy array.
 33 | 
 34 |       If `split_stereo` is enabled, the function returns a 2-tuple with the
 35 |       separated left and right channels.
 36 |     """
 37 |     resampler = av.audio.resampler.AudioResampler(
 38 |         format="s16",
 39 |         layout="mono" if not split_stereo else "stereo",
 40 |         rate=sampling_rate,
 41 |     )
 42 | 
 43 |     raw_buffer = io.BytesIO()
 44 |     dtype = None
 45 | 
 46 |     with av.open(input_file, mode="r", metadata_errors="ignore") as container:
 47 |         frames = container.decode(audio=0)
 48 |         frames = _ignore_invalid_frames(frames)
 49 |         frames = _group_frames(frames, 500000)
 50 |         frames = _resample_frames(frames, resampler)
 51 | 
 52 |         for frame in frames:
 53 |             array = frame.to_ndarray()
 54 |             dtype = array.dtype
 55 |             raw_buffer.write(array)
 56 | 
 57 |     # It appears that some objects related to the resampler are not freed
 58 |     # unless the garbage collector is manually run.
 59 |     # https://github.com/SYSTRAN/faster-whisper/issues/390
 60 |     # note that this slows down loading the audio a little bit
 61 |     # if that is a concern, please use ffmpeg directly as in here:
 62 |     # https://github.com/openai/whisper/blob/25639fc/whisper/audio.py#L25-L62
 63 |     del resampler
 64 |     gc.collect()
 65 | 
 66 |     audio = np.frombuffer(raw_buffer.getbuffer(), dtype=dtype)
 67 | 
 68 |     # Convert s16 back to f32.
 69 |     audio = audio.astype(np.float32) / 32768.0
 70 | 
 71 |     if split_stereo:
 72 |         left_channel = audio[0::2]
 73 |         right_channel = audio[1::2]
 74 |         return left_channel, right_channel
 75 | 
 76 |     return audio
 77 | 
 78 | 
 79 | def _ignore_invalid_frames(frames):
 80 |     iterator = iter(frames)
 81 | 
 82 |     while True:
 83 |         try:
 84 |             yield next(iterator)
 85 |         except StopIteration:
 86 |             break
 87 |         except av.error.InvalidDataError:
 88 |             continue
 89 | 
 90 | 
 91 | def _group_frames(frames, num_samples=None):
 92 |     fifo = av.audio.fifo.AudioFifo()
 93 | 
 94 |     for frame in frames:
 95 |         frame.pts = None  # Ignore timestamp check.
 96 |         fifo.write(frame)
 97 | 
 98 |         if num_samples is not None and fifo.samples >= num_samples:
 99 |             yield fifo.read()
100 | 
101 |     if fifo.samples > 0:
102 |         yield fifo.read()
103 | 
104 | 
105 | def _resample_frames(frames, resampler):
106 |     # Add None to flush the resampler.
107 |     for frame in itertools.chain(frames, [None]):
108 |         yield from resampler.resample(frame)
109 | 
110 | 
111 | def pad_or_trim(array, length: int = 3000, *, axis: int = -1):
112 |     """
113 |     Pad or trim the Mel features array to 3000, as expected by the encoder.
114 |     """
115 |     if array.shape[axis] > length:
116 |         array = array.take(indices=range(length), axis=axis)
117 | 
118 |     if array.shape[axis] < length:
119 |         pad_widths = [(0, 0)] * array.ndim
120 |         pad_widths[axis] = (0, length - array.shape[axis])
121 |         array = np.pad(array, pad_widths)
122 | 
123 |     return array
124 | 


--------------------------------------------------------------------------------
/faster_whisper/feature_extractor.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class FeatureExtractor:
  5 |     def __init__(
  6 |         self,
  7 |         feature_size=80,
  8 |         sampling_rate=16000,
  9 |         hop_length=160,
 10 |         chunk_length=30,
 11 |         n_fft=400,
 12 |     ):
 13 |         self.n_fft = n_fft
 14 |         self.hop_length = hop_length
 15 |         self.chunk_length = chunk_length
 16 |         self.n_samples = chunk_length * sampling_rate
 17 |         self.nb_max_frames = self.n_samples // hop_length
 18 |         self.time_per_frame = hop_length / sampling_rate
 19 |         self.sampling_rate = sampling_rate
 20 |         self.mel_filters = self.get_mel_filters(
 21 |             sampling_rate, n_fft, n_mels=feature_size
 22 |         ).astype("float32")
 23 | 
 24 |     @staticmethod
 25 |     def get_mel_filters(sr, n_fft, n_mels=128):
 26 |         # Initialize the weights
 27 |         n_mels = int(n_mels)
 28 | 
 29 |         # Center freqs of each FFT bin
 30 |         fftfreqs = np.fft.rfftfreq(n=n_fft, d=1.0 / sr)
 31 | 
 32 |         # 'Center freqs' of mel bands - uniformly spaced between limits
 33 |         min_mel = 0.0
 34 |         max_mel = 45.245640471924965
 35 | 
 36 |         mels = np.linspace(min_mel, max_mel, n_mels + 2)
 37 | 
 38 |         # Fill in the linear scale
 39 |         f_min = 0.0
 40 |         f_sp = 200.0 / 3
 41 |         freqs = f_min + f_sp * mels
 42 | 
 43 |         # And now the nonlinear scale
 44 |         min_log_hz = 1000.0  # beginning of log region (Hz)
 45 |         min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
 46 |         logstep = np.log(6.4) / 27.0  # step size for log region
 47 | 
 48 |         # If we have vector data, vectorize
 49 |         log_t = mels >= min_log_mel
 50 |         freqs[log_t] = min_log_hz * np.exp(logstep * (mels[log_t] - min_log_mel))
 51 | 
 52 |         fdiff = np.diff(freqs)
 53 |         ramps = freqs.reshape(-1, 1) - fftfreqs.reshape(1, -1)
 54 | 
 55 |         lower = -ramps[:-2] / np.expand_dims(fdiff[:-1], axis=1)
 56 |         upper = ramps[2:] / np.expand_dims(fdiff[1:], axis=1)
 57 | 
 58 |         # Intersect them with each other and zero, vectorized across all i
 59 |         weights = np.maximum(np.zeros_like(lower), np.minimum(lower, upper))
 60 | 
 61 |         # Slaney-style mel is scaled to be approx constant energy per channel
 62 |         enorm = 2.0 / (freqs[2 : n_mels + 2] - freqs[:n_mels])
 63 |         weights *= np.expand_dims(enorm, axis=1)
 64 | 
 65 |         return weights
 66 | 
 67 |     @staticmethod
 68 |     def stft(
 69 |         input_array: np.ndarray,
 70 |         n_fft: int,
 71 |         hop_length: int = None,
 72 |         win_length: int = None,
 73 |         window: np.ndarray = None,
 74 |         center: bool = True,
 75 |         mode: str = "reflect",
 76 |         normalized: bool = False,
 77 |         onesided: bool = None,
 78 |         return_complex: bool = None,
 79 |     ):
 80 |         # Default initialization for hop_length and win_length
 81 |         hop_length = hop_length if hop_length is not None else n_fft // 4
 82 |         win_length = win_length if win_length is not None else n_fft
 83 |         input_is_complex = np.iscomplexobj(input_array)
 84 | 
 85 |         # Determine if the output should be complex
 86 |         return_complex = (
 87 |             return_complex
 88 |             if return_complex is not None
 89 |             else (input_is_complex or (window is not None and np.iscomplexobj(window)))
 90 |         )
 91 | 
 92 |         if not return_complex and return_complex is None:
 93 |             raise ValueError(
 94 |                 "stft requires the return_complex parameter for real inputs."
 95 |             )
 96 | 
 97 |         # Input checks
 98 |         if not np.issubdtype(input_array.dtype, np.floating) and not input_is_complex:
 99 |             raise ValueError(
100 |                 "stft: expected an array of floating point or complex values,"
101 |                 f" got {input_array.dtype}"
102 |             )
103 | 
104 |         if input_array.ndim > 2 or input_array.ndim < 1:
105 |             raise ValueError(
106 |                 f"stft: expected a 1D or 2D array, but got {input_array.ndim}D array"
107 |             )
108 | 
109 |         # Handle 1D input
110 |         if input_array.ndim == 1:
111 |             input_array = np.expand_dims(input_array, axis=0)
112 |             input_array_1d = True
113 |         else:
114 |             input_array_1d = False
115 | 
116 |         # Center padding if required
117 |         if center:
118 |             pad_amount = n_fft // 2
119 |             input_array = np.pad(
120 |                 input_array, ((0, 0), (pad_amount, pad_amount)), mode=mode
121 |             )
122 | 
123 |         batch, length = input_array.shape
124 | 
125 |         # Additional input checks
126 |         if n_fft <= 0 or n_fft > length:
127 |             raise ValueError(
128 |                 f"stft: expected 0 < n_fft <= {length}, but got n_fft={n_fft}"
129 |             )
130 | 
131 |         if hop_length <= 0:
132 |             raise ValueError(
133 |                 f"stft: expected hop_length > 0, but got hop_length={hop_length}"
134 |             )
135 | 
136 |         if win_length <= 0 or win_length > n_fft:
137 |             raise ValueError(
138 |                 f"stft: expected 0 < win_length <= n_fft, but got win_length={win_length}"
139 |             )
140 | 
141 |         if window is not None:
142 |             if window.ndim != 1 or window.shape[0] != win_length:
143 |                 raise ValueError(
144 |                     f"stft: expected a 1D window array of size equal to win_length={win_length}, "
145 |                     f"but got window with size {window.shape}"
146 |                 )
147 | 
148 |         # Handle padding of the window if necessary
149 |         if win_length < n_fft:
150 |             left = (n_fft - win_length) // 2
151 |             window_ = np.zeros(n_fft, dtype=window.dtype)
152 |             window_[left : left + win_length] = window
153 |         else:
154 |             window_ = window
155 | 
156 |         # Calculate the number of frames
157 |         n_frames = 1 + (length - n_fft) // hop_length
158 | 
159 |         # Time to columns
160 |         input_array = np.lib.stride_tricks.as_strided(
161 |             input_array,
162 |             (batch, n_frames, n_fft),
163 |             (
164 |                 input_array.strides[0],
165 |                 hop_length * input_array.strides[1],
166 |                 input_array.strides[1],
167 |             ),
168 |         )
169 | 
170 |         if window_ is not None:
171 |             input_array = input_array * window_
172 | 
173 |         # FFT and transpose
174 |         complex_fft = input_is_complex
175 |         onesided = onesided if onesided is not None else not complex_fft
176 | 
177 |         if normalized:
178 |             norm = "ortho"
179 |         else:
180 |             norm = None
181 | 
182 |         if complex_fft:
183 |             if onesided:
184 |                 raise ValueError(
185 |                     "Cannot have onesided output if window or input is complex"
186 |                 )
187 |             output = np.fft.fft(input_array, n=n_fft, axis=-1, norm=norm)
188 |         else:
189 |             output = np.fft.rfft(input_array, n=n_fft, axis=-1, norm=norm)
190 | 
191 |         output = output.transpose((0, 2, 1))
192 | 
193 |         if input_array_1d:
194 |             output = output.squeeze(0)
195 | 
196 |         return output if return_complex else np.real(output)
197 | 
198 |     def __call__(self, waveform: np.ndarray, padding=160, chunk_length=None):
199 |         """
200 |         Compute the log-Mel spectrogram of the provided audio.
201 |         """
202 | 
203 |         if chunk_length is not None:
204 |             self.n_samples = chunk_length * self.sampling_rate
205 |             self.nb_max_frames = self.n_samples // self.hop_length
206 | 
207 |         if waveform.dtype is not np.float32:
208 |             waveform = waveform.astype(np.float32)
209 | 
210 |         if padding:
211 |             waveform = np.pad(waveform, (0, padding))
212 | 
213 |         window = np.hanning(self.n_fft + 1)[:-1].astype("float32")
214 | 
215 |         stft = self.stft(
216 |             waveform,
217 |             self.n_fft,
218 |             self.hop_length,
219 |             window=window,
220 |             return_complex=True,
221 |         ).astype("complex64")
222 |         magnitudes = np.abs(stft[..., :-1]) ** 2
223 | 
224 |         mel_spec = self.mel_filters @ magnitudes
225 | 
226 |         log_spec = np.log10(np.clip(mel_spec, a_min=1e-10, a_max=None))
227 |         log_spec = np.maximum(log_spec, log_spec.max() - 8.0)
228 |         log_spec = (log_spec + 4.0) / 4.0
229 | 
230 |         return log_spec
231 | 


--------------------------------------------------------------------------------
/faster_whisper/tokenizer.py:
--------------------------------------------------------------------------------
  1 | import string
  2 | 
  3 | from functools import cached_property
  4 | from typing import List, Optional, Tuple
  5 | 
  6 | import tokenizers
  7 | 
  8 | 
  9 | class Tokenizer:
 10 |     """Simple wrapper around a tokenizers.Tokenizer."""
 11 | 
 12 |     def __init__(
 13 |         self,
 14 |         tokenizer: tokenizers.Tokenizer,
 15 |         multilingual: bool,
 16 |         task: Optional[str] = None,
 17 |         language: Optional[str] = None,
 18 |     ):
 19 |         self.tokenizer = tokenizer
 20 | 
 21 |         if multilingual:
 22 |             if task not in _TASKS:
 23 |                 raise ValueError(
 24 |                     "'%s' is not a valid task (accepted tasks: %s)"
 25 |                     % (task, ", ".join(_TASKS))
 26 |                 )
 27 | 
 28 |             if language not in _LANGUAGE_CODES:
 29 |                 raise ValueError(
 30 |                     "'%s' is not a valid language code (accepted language codes: %s)"
 31 |                     % (language, ", ".join(_LANGUAGE_CODES))
 32 |                 )
 33 | 
 34 |             self.task = self.tokenizer.token_to_id("<|%s|>" % task)
 35 |             self.language = self.tokenizer.token_to_id("<|%s|>" % language)
 36 |             self.language_code = language
 37 |         else:
 38 |             self.task = None
 39 |             self.language = None
 40 |             self.language_code = "en"
 41 | 
 42 |     @cached_property
 43 |     def transcribe(self) -> int:
 44 |         return self.tokenizer.token_to_id("<|transcribe|>")
 45 | 
 46 |     @cached_property
 47 |     def translate(self) -> int:
 48 |         return self.tokenizer.token_to_id("<|translate|>")
 49 | 
 50 |     @cached_property
 51 |     def sot(self) -> int:
 52 |         return self.tokenizer.token_to_id("<|startoftranscript|>")
 53 | 
 54 |     @cached_property
 55 |     def sot_lm(self) -> int:
 56 |         return self.tokenizer.token_to_id("<|startoflm|>")
 57 | 
 58 |     @cached_property
 59 |     def sot_prev(self) -> int:
 60 |         return self.tokenizer.token_to_id("<|startofprev|>")
 61 | 
 62 |     @cached_property
 63 |     def eot(self) -> int:
 64 |         return self.tokenizer.token_to_id("<|endoftext|>")
 65 | 
 66 |     @cached_property
 67 |     def no_timestamps(self) -> int:
 68 |         return self.tokenizer.token_to_id("<|notimestamps|>")
 69 | 
 70 |     @property
 71 |     def timestamp_begin(self) -> int:
 72 |         return self.no_timestamps + 1
 73 | 
 74 |     @property
 75 |     def sot_sequence(self) -> List[int]:
 76 |         sequence = [self.sot]
 77 | 
 78 |         if self.language is not None:
 79 |             sequence.append(self.language)
 80 | 
 81 |         if self.task is not None:
 82 |             sequence.append(self.task)
 83 | 
 84 |         return sequence
 85 | 
 86 |     def encode(self, text: str) -> List[int]:
 87 |         return self.tokenizer.encode(text, add_special_tokens=False).ids
 88 | 
 89 |     def decode(self, tokens: List[int]) -> str:
 90 |         text_tokens = [token for token in tokens if token < self.eot]
 91 |         return self.tokenizer.decode(text_tokens)
 92 | 
 93 |     def decode_with_timestamps(self, tokens: List[int]) -> str:
 94 |         outputs = [[]]
 95 | 
 96 |         for token in tokens:
 97 |             if token >= self.timestamp_begin:
 98 |                 timestamp = f"<|{(token - self.timestamp_begin) * 0.02:.2f}|>"
 99 |                 outputs.append(timestamp)
100 |                 outputs.append([])
101 |             else:
102 |                 outputs[-1].append(token)
103 | 
104 |         return "".join(
105 |             [s if isinstance(s, str) else self.tokenizer.decode(s) for s in outputs]
106 |         )
107 | 
108 |     @cached_property
109 |     def non_speech_tokens(self) -> Tuple[int]:
110 |         """
111 |         Returns the list of tokens to suppress in order to avoid any speaker tags or non-speech
112 |         annotations, to prevent sampling texts that are not actually spoken in the audio, e.g.
113 | 
114 |         - ♪♪♪
115 |         - ( SPEAKING FOREIGN LANGUAGE )
116 |         - [DAVID] Hey there,
117 | 
118 |         keeping basic punctuations like commas, periods, question marks, exclamation points, etc.
119 |         """
120 |         symbols = list('"#()*+/:;<=>@[\\]^_`{|}~「」『』')
121 |         symbols += (
122 |             "<< >> <<< >>> -- --- -( -[ (' (\" (( )) ((( ))) [[ ]] {{ }} ♪♪ ♪♪♪".split()
123 |         )
124 | 
125 |         # symbols that may be a single token or multiple tokens depending on the tokenizer.
126 |         # In case they're multiple tokens, suppress the first token, which is safe because:
127 |         # These are between U+2640 and U+267F miscellaneous symbols that are okay to suppress
128 |         # in generations, and in the 3-byte UTF-8 representation they share the first two bytes.
129 |         miscellaneous = set("♩♪♫♬♭♮♯")
130 |         assert all(0x2640 <= ord(c) <= 0x267F for c in miscellaneous)
131 | 
132 |         # allow hyphens "-" and single quotes "'" between words, but not at the beginning of a word
133 |         result = {self.encode(" -")[0], self.encode(" '")[0]}
134 |         for symbol in symbols + list(miscellaneous):
135 |             for tokens in [
136 |                 self.encode(symbol),
137 |                 self.encode(" " + symbol),
138 |             ]:
139 |                 if len(tokens) == 1 or symbol in miscellaneous:
140 |                     result.add(tokens[0])
141 | 
142 |         return tuple(sorted(result))
143 | 
144 |     def split_to_word_tokens(
145 |         self, tokens: List[int]
146 |     ) -> Tuple[List[str], List[List[int]]]:
147 |         if self.language_code in {"zh", "ja", "th", "lo", "my", "yue"}:
148 |             # These languages don't typically use spaces, so it is difficult to split words
149 |             # without morpheme analysis. Here, we instead split words at any
150 |             # position where the tokens are decoded as valid unicode points
151 |             return self.split_tokens_on_unicode(tokens)
152 | 
153 |         return self.split_tokens_on_spaces(tokens)
154 | 
155 |     def split_tokens_on_unicode(
156 |         self, tokens: List[int]
157 |     ) -> Tuple[List[str], List[List[int]]]:
158 |         decoded_full = self.decode_with_timestamps(tokens)
159 |         replacement_char = "\ufffd"
160 | 
161 |         words = []
162 |         word_tokens = []
163 |         current_tokens = []
164 |         unicode_offset = 0
165 | 
166 |         for token in tokens:
167 |             current_tokens.append(token)
168 |             decoded = self.decode_with_timestamps(current_tokens)
169 | 
170 |             try:
171 |                 replacement_char_index = decoded.index(replacement_char)
172 |                 replacement_char_index += unicode_offset
173 |             except ValueError:
174 |                 replacement_char_index = None
175 | 
176 |             if replacement_char_index is None or (
177 |                 replacement_char_index < len(decoded_full)
178 |                 and decoded_full[replacement_char_index] == replacement_char
179 |             ):
180 |                 words.append(decoded)
181 |                 word_tokens.append(current_tokens)
182 |                 current_tokens = []
183 |                 unicode_offset += len(decoded)
184 | 
185 |         return words, word_tokens
186 | 
187 |     def split_tokens_on_spaces(
188 |         self, tokens: List[int]
189 |     ) -> Tuple[List[str], List[List[int]]]:
190 |         subwords, subword_tokens_list = self.split_tokens_on_unicode(tokens)
191 |         words = []
192 |         word_tokens = []
193 | 
194 |         for subword, subword_tokens in zip(subwords, subword_tokens_list):
195 |             special = subword_tokens[0] >= self.eot
196 |             with_space = subword.startswith(" ")
197 |             punctuation = subword.strip() in string.punctuation
198 |             if special or with_space or punctuation or len(words) == 0:
199 |                 words.append(subword)
200 |                 word_tokens.append(subword_tokens)
201 |             else:
202 |                 words[-1] = words[-1] + subword
203 |                 word_tokens[-1].extend(subword_tokens)
204 | 
205 |         return words, word_tokens
206 | 
207 | 
208 | _TASKS = (
209 |     "transcribe",
210 |     "translate",
211 | )
212 | 
213 | _LANGUAGE_CODES = (
214 |     "af",
215 |     "am",
216 |     "ar",
217 |     "as",
218 |     "az",
219 |     "ba",
220 |     "be",
221 |     "bg",
222 |     "bn",
223 |     "bo",
224 |     "br",
225 |     "bs",
226 |     "ca",
227 |     "cs",
228 |     "cy",
229 |     "da",
230 |     "de",
231 |     "el",
232 |     "en",
233 |     "es",
234 |     "et",
235 |     "eu",
236 |     "fa",
237 |     "fi",
238 |     "fo",
239 |     "fr",
240 |     "gl",
241 |     "gu",
242 |     "ha",
243 |     "haw",
244 |     "he",
245 |     "hi",
246 |     "hr",
247 |     "ht",
248 |     "hu",
249 |     "hy",
250 |     "id",
251 |     "is",
252 |     "it",
253 |     "ja",
254 |     "jw",
255 |     "ka",
256 |     "kk",
257 |     "km",
258 |     "kn",
259 |     "ko",
260 |     "la",
261 |     "lb",
262 |     "ln",
263 |     "lo",
264 |     "lt",
265 |     "lv",
266 |     "mg",
267 |     "mi",
268 |     "mk",
269 |     "ml",
270 |     "mn",
271 |     "mr",
272 |     "ms",
273 |     "mt",
274 |     "my",
275 |     "ne",
276 |     "nl",
277 |     "nn",
278 |     "no",
279 |     "oc",
280 |     "pa",
281 |     "pl",
282 |     "ps",
283 |     "pt",
284 |     "ro",
285 |     "ru",
286 |     "sa",
287 |     "sd",
288 |     "si",
289 |     "sk",
290 |     "sl",
291 |     "sn",
292 |     "so",
293 |     "sq",
294 |     "sr",
295 |     "su",
296 |     "sv",
297 |     "sw",
298 |     "ta",
299 |     "te",
300 |     "tg",
301 |     "th",
302 |     "tk",
303 |     "tl",
304 |     "tr",
305 |     "tt",
306 |     "uk",
307 |     "ur",
308 |     "uz",
309 |     "vi",
310 |     "yi",
311 |     "yo",
312 |     "zh",
313 |     "yue",
314 | )
315 | 


--------------------------------------------------------------------------------
/faster_whisper/utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import re
  4 | 
  5 | from typing import List, Optional, Union
  6 | 
  7 | import huggingface_hub
  8 | import requests
  9 | 
 10 | from tqdm.auto import tqdm
 11 | 
 12 | _MODELS = {
 13 |     "tiny.en": "Systran/faster-whisper-tiny.en",
 14 |     "tiny": "Systran/faster-whisper-tiny",
 15 |     "base.en": "Systran/faster-whisper-base.en",
 16 |     "base": "Systran/faster-whisper-base",
 17 |     "small.en": "Systran/faster-whisper-small.en",
 18 |     "small": "Systran/faster-whisper-small",
 19 |     "medium.en": "Systran/faster-whisper-medium.en",
 20 |     "medium": "Systran/faster-whisper-medium",
 21 |     "large-v1": "Systran/faster-whisper-large-v1",
 22 |     "large-v2": "Systran/faster-whisper-large-v2",
 23 |     "large-v3": "Systran/faster-whisper-large-v3",
 24 |     "large": "Systran/faster-whisper-large-v3",
 25 |     "distil-large-v2": "Systran/faster-distil-whisper-large-v2",
 26 |     "distil-medium.en": "Systran/faster-distil-whisper-medium.en",
 27 |     "distil-small.en": "Systran/faster-distil-whisper-small.en",
 28 |     "distil-large-v3": "Systran/faster-distil-whisper-large-v3",
 29 |     "distil-large-v3.5": "distil-whisper/distil-large-v3.5-ct2",
 30 |     "large-v3-turbo": "mobiuslabsgmbh/faster-whisper-large-v3-turbo",
 31 |     "turbo": "mobiuslabsgmbh/faster-whisper-large-v3-turbo",
 32 | }
 33 | 
 34 | 
 35 | def available_models() -> List[str]:
 36 |     """Returns the names of available models."""
 37 |     return list(_MODELS.keys())
 38 | 
 39 | 
 40 | def get_assets_path():
 41 |     """Returns the path to the assets directory."""
 42 |     return os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets")
 43 | 
 44 | 
 45 | def get_logger():
 46 |     """Returns the module logger."""
 47 |     return logging.getLogger("faster_whisper")
 48 | 
 49 | 
 50 | def download_model(
 51 |     size_or_id: str,
 52 |     output_dir: Optional[str] = None,
 53 |     local_files_only: bool = False,
 54 |     cache_dir: Optional[str] = None,
 55 |     revision: Optional[str] = None,
 56 |     use_auth_token: Optional[Union[str, bool]] = None,
 57 | ):
 58 |     """Downloads a CTranslate2 Whisper model from the Hugging Face Hub.
 59 | 
 60 |     Args:
 61 |       size_or_id: Size of the model to download from https://huggingface.co/Systran
 62 |         (tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en,
 63 |         distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2,
 64 |         distil-large-v3), or a CTranslate2-converted model ID from the Hugging Face Hub
 65 |         (e.g. Systran/faster-whisper-large-v3).
 66 |       output_dir: Directory where the model should be saved. If not set, the model is saved in
 67 |         the cache directory.
 68 |       local_files_only:  If True, avoid downloading the file and return the path to the local
 69 |         cached file if it exists.
 70 |       cache_dir: Path to the folder where cached files are stored.
 71 |       revision: An optional Git revision id which can be a branch name, a tag, or a
 72 |             commit hash.
 73 |       use_auth_token: HuggingFace authentication token or True to use the
 74 |             token stored by the HuggingFace config folder.
 75 | 
 76 |     Returns:
 77 |       The path to the downloaded model.
 78 | 
 79 |     Raises:
 80 |       ValueError: if the model size is invalid.
 81 |     """
 82 |     if re.match(r".*/.*", size_or_id):
 83 |         repo_id = size_or_id
 84 |     else:
 85 |         repo_id = _MODELS.get(size_or_id)
 86 |         if repo_id is None:
 87 |             raise ValueError(
 88 |                 "Invalid model size '%s', expected one of: %s"
 89 |                 % (size_or_id, ", ".join(_MODELS.keys()))
 90 |             )
 91 | 
 92 |     allow_patterns = [
 93 |         "config.json",
 94 |         "preprocessor_config.json",
 95 |         "model.bin",
 96 |         "tokenizer.json",
 97 |         "vocabulary.*",
 98 |     ]
 99 | 
100 |     kwargs = {
101 |         "local_files_only": local_files_only,
102 |         "allow_patterns": allow_patterns,
103 |         "tqdm_class": disabled_tqdm,
104 |         "revision": revision,
105 |     }
106 | 
107 |     if output_dir is not None:
108 |         kwargs["local_dir"] = output_dir
109 |         kwargs["local_dir_use_symlinks"] = False
110 | 
111 |     if cache_dir is not None:
112 |         kwargs["cache_dir"] = cache_dir
113 | 
114 |     if use_auth_token is not None:
115 |         kwargs["token"] = use_auth_token
116 | 
117 |     try:
118 |         return huggingface_hub.snapshot_download(repo_id, **kwargs)
119 |     except (
120 |         huggingface_hub.utils.HfHubHTTPError,
121 |         requests.exceptions.ConnectionError,
122 |     ) as exception:
123 |         logger = get_logger()
124 |         logger.warning(
125 |             "An error occured while synchronizing the model %s from the Hugging Face Hub:\n%s",
126 |             repo_id,
127 |             exception,
128 |         )
129 |         logger.warning(
130 |             "Trying to load the model directly from the local cache, if it exists."
131 |         )
132 | 
133 |         kwargs["local_files_only"] = True
134 |         return huggingface_hub.snapshot_download(repo_id, **kwargs)
135 | 
136 | 
137 | def format_timestamp(
138 |     seconds: float,
139 |     always_include_hours: bool = False,
140 |     decimal_marker: str = ".",
141 | ) -> str:
142 |     assert seconds >= 0, "non-negative timestamp expected"
143 |     milliseconds = round(seconds * 1000.0)
144 | 
145 |     hours = milliseconds // 3_600_000
146 |     milliseconds -= hours * 3_600_000
147 | 
148 |     minutes = milliseconds // 60_000
149 |     milliseconds -= minutes * 60_000
150 | 
151 |     seconds = milliseconds // 1_000
152 |     milliseconds -= seconds * 1_000
153 | 
154 |     hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
155 |     return (
156 |         f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
157 |     )
158 | 
159 | 
160 | class disabled_tqdm(tqdm):
161 |     def __init__(self, *args, **kwargs):
162 |         kwargs["disable"] = True
163 |         super().__init__(*args, **kwargs)
164 | 
165 | 
166 | def get_end(segments: List[dict]) -> Optional[float]:
167 |     return next(
168 |         (w["end"] for s in reversed(segments) for w in reversed(s["words"])),
169 |         segments[-1]["end"] if segments else None,
170 |     )
171 | 


--------------------------------------------------------------------------------
/faster_whisper/vad.py:
--------------------------------------------------------------------------------
  1 | import bisect
  2 | import functools
  3 | import os
  4 | 
  5 | from dataclasses import dataclass
  6 | from typing import Dict, List, Optional, Tuple
  7 | 
  8 | import numpy as np
  9 | 
 10 | from faster_whisper.utils import get_assets_path
 11 | 
 12 | 
 13 | # The code below is adapted from https://github.com/snakers4/silero-vad.
 14 | @dataclass
 15 | class VadOptions:
 16 |     """VAD options.
 17 | 
 18 |     Attributes:
 19 |       threshold: Speech threshold. Silero VAD outputs speech probabilities for each audio chunk,
 20 |         probabilities ABOVE this value are considered as SPEECH. It is better to tune this
 21 |         parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets.
 22 |       neg_threshold: Silence threshold for determining the end of speech. If a probability is lower
 23 |         than neg_threshold, it is always considered silence. Values higher than neg_threshold
 24 |         are only considered speech if the previous sample was classified as speech; otherwise,
 25 |         they are treated as silence. This parameter helps refine the detection of speech
 26 |          transitions, ensuring smoother segment boundaries.
 27 |       min_speech_duration_ms: Final speech chunks shorter min_speech_duration_ms are thrown out.
 28 |       max_speech_duration_s: Maximum duration of speech chunks in seconds. Chunks longer
 29 |         than max_speech_duration_s will be split at the timestamp of the last silence that
 30 |         lasts more than 100ms (if any), to prevent aggressive cutting. Otherwise, they will be
 31 |         split aggressively just before max_speech_duration_s.
 32 |       min_silence_duration_ms: In the end of each speech chunk wait for min_silence_duration_ms
 33 |         before separating it
 34 |       speech_pad_ms: Final speech chunks are padded by speech_pad_ms each side
 35 |     """
 36 | 
 37 |     threshold: float = 0.5
 38 |     neg_threshold: float = None
 39 |     min_speech_duration_ms: int = 0
 40 |     max_speech_duration_s: float = float("inf")
 41 |     min_silence_duration_ms: int = 2000
 42 |     speech_pad_ms: int = 400
 43 | 
 44 | 
 45 | def get_speech_timestamps(
 46 |     audio: np.ndarray,
 47 |     vad_options: Optional[VadOptions] = None,
 48 |     sampling_rate: int = 16000,
 49 |     **kwargs,
 50 | ) -> List[dict]:
 51 |     """This method is used for splitting long audios into speech chunks using silero VAD.
 52 | 
 53 |     Args:
 54 |       audio: One dimensional float array.
 55 |       vad_options: Options for VAD processing.
 56 |       sampling rate: Sampling rate of the audio.
 57 |       kwargs: VAD options passed as keyword arguments for backward compatibility.
 58 | 
 59 |     Returns:
 60 |       List of dicts containing begin and end samples of each speech chunk.
 61 |     """
 62 |     if vad_options is None:
 63 |         vad_options = VadOptions(**kwargs)
 64 | 
 65 |     threshold = vad_options.threshold
 66 |     neg_threshold = vad_options.neg_threshold
 67 |     min_speech_duration_ms = vad_options.min_speech_duration_ms
 68 |     max_speech_duration_s = vad_options.max_speech_duration_s
 69 |     min_silence_duration_ms = vad_options.min_silence_duration_ms
 70 |     window_size_samples = 512
 71 |     speech_pad_ms = vad_options.speech_pad_ms
 72 |     min_speech_samples = sampling_rate * min_speech_duration_ms / 1000
 73 |     speech_pad_samples = sampling_rate * speech_pad_ms / 1000
 74 |     max_speech_samples = (
 75 |         sampling_rate * max_speech_duration_s
 76 |         - window_size_samples
 77 |         - 2 * speech_pad_samples
 78 |     )
 79 |     min_silence_samples = sampling_rate * min_silence_duration_ms / 1000
 80 |     min_silence_samples_at_max_speech = sampling_rate * 98 / 1000
 81 | 
 82 |     audio_length_samples = len(audio)
 83 | 
 84 |     model = get_vad_model()
 85 | 
 86 |     padded_audio = np.pad(
 87 |         audio, (0, window_size_samples - audio.shape[0] % window_size_samples)
 88 |     )
 89 |     speech_probs = model(padded_audio.reshape(1, -1)).squeeze(0)
 90 | 
 91 |     triggered = False
 92 |     speeches = []
 93 |     current_speech = {}
 94 |     if neg_threshold is None:
 95 |         neg_threshold = max(threshold - 0.15, 0.01)
 96 | 
 97 |     # to save potential segment end (and tolerate some silence)
 98 |     temp_end = 0
 99 |     # to save potential segment limits in case of maximum segment size reached
100 |     prev_end = next_start = 0
101 | 
102 |     for i, speech_prob in enumerate(speech_probs):
103 |         if (speech_prob >= threshold) and temp_end:
104 |             temp_end = 0
105 |             if next_start < prev_end:
106 |                 next_start = window_size_samples * i
107 | 
108 |         if (speech_prob >= threshold) and not triggered:
109 |             triggered = True
110 |             current_speech["start"] = window_size_samples * i
111 |             continue
112 | 
113 |         if (
114 |             triggered
115 |             and (window_size_samples * i) - current_speech["start"] > max_speech_samples
116 |         ):
117 |             if prev_end:
118 |                 current_speech["end"] = prev_end
119 |                 speeches.append(current_speech)
120 |                 current_speech = {}
121 |                 # previously reached silence (< neg_thres) and is still not speech (< thres)
122 |                 if next_start < prev_end:
123 |                     triggered = False
124 |                 else:
125 |                     current_speech["start"] = next_start
126 |                 prev_end = next_start = temp_end = 0
127 |             else:
128 |                 current_speech["end"] = window_size_samples * i
129 |                 speeches.append(current_speech)
130 |                 current_speech = {}
131 |                 prev_end = next_start = temp_end = 0
132 |                 triggered = False
133 |                 continue
134 | 
135 |         if (speech_prob < neg_threshold) and triggered:
136 |             if not temp_end:
137 |                 temp_end = window_size_samples * i
138 |             # condition to avoid cutting in very short silence
139 |             if (window_size_samples * i) - temp_end > min_silence_samples_at_max_speech:
140 |                 prev_end = temp_end
141 |             if (window_size_samples * i) - temp_end < min_silence_samples:
142 |                 continue
143 |             else:
144 |                 current_speech["end"] = temp_end
145 |                 if (
146 |                     current_speech["end"] - current_speech["start"]
147 |                 ) > min_speech_samples:
148 |                     speeches.append(current_speech)
149 |                 current_speech = {}
150 |                 prev_end = next_start = temp_end = 0
151 |                 triggered = False
152 |                 continue
153 | 
154 |     if (
155 |         current_speech
156 |         and (audio_length_samples - current_speech["start"]) > min_speech_samples
157 |     ):
158 |         current_speech["end"] = audio_length_samples
159 |         speeches.append(current_speech)
160 | 
161 |     for i, speech in enumerate(speeches):
162 |         if i == 0:
163 |             speech["start"] = int(max(0, speech["start"] - speech_pad_samples))
164 |         if i != len(speeches) - 1:
165 |             silence_duration = speeches[i + 1]["start"] - speech["end"]
166 |             if silence_duration < 2 * speech_pad_samples:
167 |                 speech["end"] += int(silence_duration // 2)
168 |                 speeches[i + 1]["start"] = int(
169 |                     max(0, speeches[i + 1]["start"] - silence_duration // 2)
170 |                 )
171 |             else:
172 |                 speech["end"] = int(
173 |                     min(audio_length_samples, speech["end"] + speech_pad_samples)
174 |                 )
175 |                 speeches[i + 1]["start"] = int(
176 |                     max(0, speeches[i + 1]["start"] - speech_pad_samples)
177 |                 )
178 |         else:
179 |             speech["end"] = int(
180 |                 min(audio_length_samples, speech["end"] + speech_pad_samples)
181 |             )
182 | 
183 |     return speeches
184 | 
185 | 
186 | def collect_chunks(
187 |     audio: np.ndarray, chunks: List[dict], sampling_rate: int = 16000
188 | ) -> Tuple[List[np.ndarray], List[Dict[str, int]]]:
189 |     """Collects audio chunks."""
190 |     if not chunks:
191 |         chunk_metadata = {
192 |             "start_time": 0,
193 |             "end_time": 0,
194 |         }
195 |         return [np.array([], dtype=np.float32)], [chunk_metadata]
196 | 
197 |     audio_chunks = []
198 |     chunks_metadata = []
199 |     for chunk in chunks:
200 |         chunk_metadata = {
201 |             "start_time": chunk["start"] / sampling_rate,
202 |             "end_time": chunk["end"] / sampling_rate,
203 |         }
204 |         audio_chunks.append(audio[chunk["start"] : chunk["end"]])
205 |         chunks_metadata.append(chunk_metadata)
206 |     return audio_chunks, chunks_metadata
207 | 
208 | 
209 | class SpeechTimestampsMap:
210 |     """Helper class to restore original speech timestamps."""
211 | 
212 |     def __init__(self, chunks: List[dict], sampling_rate: int, time_precision: int = 2):
213 |         self.sampling_rate = sampling_rate
214 |         self.time_precision = time_precision
215 |         self.chunk_end_sample = []
216 |         self.total_silence_before = []
217 | 
218 |         previous_end = 0
219 |         silent_samples = 0
220 | 
221 |         for chunk in chunks:
222 |             silent_samples += chunk["start"] - previous_end
223 |             previous_end = chunk["end"]
224 | 
225 |             self.chunk_end_sample.append(chunk["end"] - silent_samples)
226 |             self.total_silence_before.append(silent_samples / sampling_rate)
227 | 
228 |     def get_original_time(
229 |         self,
230 |         time: float,
231 |         chunk_index: Optional[int] = None,
232 |     ) -> float:
233 |         if chunk_index is None:
234 |             chunk_index = self.get_chunk_index(time)
235 | 
236 |         total_silence_before = self.total_silence_before[chunk_index]
237 |         return round(total_silence_before + time, self.time_precision)
238 | 
239 |     def get_chunk_index(self, time: float) -> int:
240 |         sample = int(time * self.sampling_rate)
241 |         return min(
242 |             bisect.bisect(self.chunk_end_sample, sample),
243 |             len(self.chunk_end_sample) - 1,
244 |         )
245 | 
246 | 
247 | @functools.lru_cache
248 | def get_vad_model():
249 |     """Returns the VAD model instance."""
250 |     encoder_path = os.path.join(get_assets_path(), "silero_encoder_v5.onnx")
251 |     decoder_path = os.path.join(get_assets_path(), "silero_decoder_v5.onnx")
252 |     return SileroVADModel(encoder_path, decoder_path)
253 | 
254 | 
255 | class SileroVADModel:
256 |     def __init__(self, encoder_path, decoder_path):
257 |         try:
258 |             import onnxruntime
259 |         except ImportError as e:
260 |             raise RuntimeError(
261 |                 "Applying the VAD filter requires the onnxruntime package"
262 |             ) from e
263 | 
264 |         opts = onnxruntime.SessionOptions()
265 |         opts.inter_op_num_threads = 1
266 |         opts.intra_op_num_threads = 1
267 |         opts.enable_cpu_mem_arena = False
268 |         opts.log_severity_level = 4
269 | 
270 |         self.encoder_session = onnxruntime.InferenceSession(
271 |             encoder_path,
272 |             providers=["CPUExecutionProvider"],
273 |             sess_options=opts,
274 |         )
275 |         self.decoder_session = onnxruntime.InferenceSession(
276 |             decoder_path,
277 |             providers=["CPUExecutionProvider"],
278 |             sess_options=opts,
279 |         )
280 | 
281 |     def __call__(
282 |         self, audio: np.ndarray, num_samples: int = 512, context_size_samples: int = 64
283 |     ):
284 |         assert (
285 |             audio.ndim == 2
286 |         ), "Input should be a 2D array with size (batch_size, num_samples)"
287 |         assert (
288 |             audio.shape[1] % num_samples == 0
289 |         ), "Input size should be a multiple of num_samples"
290 | 
291 |         batch_size = audio.shape[0]
292 | 
293 |         state = np.zeros((2, batch_size, 128), dtype="float32")
294 |         context = np.zeros(
295 |             (batch_size, context_size_samples),
296 |             dtype="float32",
297 |         )
298 | 
299 |         batched_audio = audio.reshape(batch_size, -1, num_samples)
300 |         context = batched_audio[..., -context_size_samples:]
301 |         context[:, -1] = 0
302 |         context = np.roll(context, 1, 1)
303 |         batched_audio = np.concatenate([context, batched_audio], 2)
304 | 
305 |         batched_audio = batched_audio.reshape(-1, num_samples + context_size_samples)
306 | 
307 |         encoder_batch_size = 10000
308 |         num_segments = batched_audio.shape[0]
309 |         encoder_outputs = []
310 |         for i in range(0, num_segments, encoder_batch_size):
311 |             encoder_output = self.encoder_session.run(
312 |                 None, {"input": batched_audio[i : i + encoder_batch_size]}
313 |             )[0]
314 |             encoder_outputs.append(encoder_output)
315 | 
316 |         encoder_output = np.concatenate(encoder_outputs, axis=0)
317 |         encoder_output = encoder_output.reshape(batch_size, -1, 128)
318 | 
319 |         decoder_outputs = []
320 |         for window in np.split(encoder_output, encoder_output.shape[1], axis=1):
321 |             out, state = self.decoder_session.run(
322 |                 None, {"input": window.squeeze(1), "state": state}
323 |             )
324 |             decoder_outputs.append(out)
325 | 
326 |         out = np.stack(decoder_outputs, axis=1).squeeze(-1)
327 |         return out
328 | 
329 | 
330 | def merge_segments(segments_list, vad_options: VadOptions, sampling_rate: int = 16000):
331 |     if not segments_list:
332 |         return []
333 | 
334 |     curr_end = 0
335 |     seg_idxs = []
336 |     merged_segments = []
337 |     edge_padding = vad_options.speech_pad_ms * sampling_rate // 1000
338 |     chunk_length = vad_options.max_speech_duration_s * sampling_rate
339 | 
340 |     curr_start = segments_list[0]["start"]
341 | 
342 |     for idx, seg in enumerate(segments_list):
343 |         # if any segment start timing is less than previous segment end timing,
344 |         # reset the edge padding. Similarly for end timing.
345 |         if idx > 0:
346 |             if seg["start"] < segments_list[idx - 1]["end"]:
347 |                 seg["start"] += edge_padding
348 |         if idx < len(segments_list) - 1:
349 |             if seg["end"] > segments_list[idx + 1]["start"]:
350 |                 seg["end"] -= edge_padding
351 | 
352 |         if seg["end"] - curr_start > chunk_length and curr_end - curr_start > 0:
353 |             merged_segments.append(
354 |                 {
355 |                     "start": curr_start,
356 |                     "end": curr_end,
357 |                     "segments": seg_idxs,
358 |                 }
359 |             )
360 |             curr_start = seg["start"]
361 |             seg_idxs = []
362 |         curr_end = seg["end"]
363 |         seg_idxs.append((seg["start"], seg["end"]))
364 |     # add final
365 |     merged_segments.append(
366 |         {
367 |             "start": curr_start,
368 |             "end": curr_end,
369 |             "segments": seg_idxs,
370 |         }
371 |     )
372 |     return merged_segments
373 | 


--------------------------------------------------------------------------------
/faster_whisper/version.py:
--------------------------------------------------------------------------------
1 | """Version information."""
2 | 
3 | __version__ = "1.1.1"
4 | 


--------------------------------------------------------------------------------
/requirements.conversion.txt:
--------------------------------------------------------------------------------
1 | transformers[torch]>=4.23
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ctranslate2>=4.0,<5
2 | huggingface_hub>=0.13
3 | tokenizers>=0.13,<1
4 | onnxruntime>=1.14,<2 
5 | av>=11
6 | tqdm


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 100
 3 | ignore =
 4 |   E203,
 5 |   W503,
 6 | 
 7 | [isort]
 8 | profile=black
 9 | lines_between_types=1
10 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from setuptools import find_packages, setup
 4 | 
 5 | base_dir = os.path.dirname(os.path.abspath(__file__))
 6 | 
 7 | 
 8 | def get_long_description():
 9 |     readme_path = os.path.join(base_dir, "README.md")
10 |     with open(readme_path, encoding="utf-8") as readme_file:
11 |         return readme_file.read()
12 | 
13 | 
14 | def get_project_version():
15 |     version_path = os.path.join(base_dir, "faster_whisper", "version.py")
16 |     version = {}
17 |     with open(version_path, encoding="utf-8") as fp:
18 |         exec(fp.read(), version)
19 |     return version["__version__"]
20 | 
21 | 
22 | def get_requirements(path):
23 |     with open(path, encoding="utf-8") as requirements:
24 |         return [requirement.strip() for requirement in requirements]
25 | 
26 | 
27 | install_requires = get_requirements(os.path.join(base_dir, "requirements.txt"))
28 | conversion_requires = get_requirements(
29 |     os.path.join(base_dir, "requirements.conversion.txt")
30 | )
31 | 
32 | setup(
33 |     name="faster-whisper",
34 |     version=get_project_version(),
35 |     license="MIT",
36 |     description="Faster Whisper transcription with CTranslate2",
37 |     long_description=get_long_description(),
38 |     long_description_content_type="text/markdown",
39 |     author="Guillaume Klein",
40 |     url="https://github.com/SYSTRAN/faster-whisper",
41 |     classifiers=[
42 |         "Development Status :: 4 - Beta",
43 |         "Intended Audience :: Developers",
44 |         "Intended Audience :: Science/Research",
45 |         "License :: OSI Approved :: MIT License",
46 |         "Programming Language :: Python :: 3",
47 |         "Programming Language :: Python :: 3 :: Only",
48 |         "Programming Language :: Python :: 3.9",
49 |         "Programming Language :: Python :: 3.10",
50 |         "Programming Language :: Python :: 3.11",
51 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
52 |     ],
53 |     keywords="openai whisper speech ctranslate2 inference quantization transformer",
54 |     python_requires=">=3.9",
55 |     install_requires=install_requires,
56 |     extras_require={
57 |         "conversion": conversion_requires,
58 |         "dev": [
59 |             "black==23.*",
60 |             "flake8==6.*",
61 |             "isort==5.*",
62 |             "pytest==7.*",
63 |         ],
64 |     },
65 |     packages=find_packages(),
66 |     include_package_data=True,
67 | )
68 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def data_dir():
 8 |     return os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 9 | 
10 | 
11 | @pytest.fixture
12 | def jfk_path(data_dir):
13 |     return os.path.join(data_dir, "jfk.flac")
14 | 
15 | 
16 | @pytest.fixture
17 | def physcisworks_path(data_dir):
18 |     return os.path.join(data_dir, "physicsworks.wav")
19 | 


--------------------------------------------------------------------------------
/tests/data/hotwords.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/hotwords.mp3


--------------------------------------------------------------------------------
/tests/data/jfk.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/jfk.flac


--------------------------------------------------------------------------------
/tests/data/multilingual.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/multilingual.mp3


--------------------------------------------------------------------------------
/tests/data/physicsworks.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/physicsworks.wav


--------------------------------------------------------------------------------
/tests/data/stereo_diarization.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/stereo_diarization.wav


--------------------------------------------------------------------------------
/tests/test_tokenizer.py:
--------------------------------------------------------------------------------
  1 | from faster_whisper import WhisperModel
  2 | from faster_whisper.tokenizer import Tokenizer
  3 | from faster_whisper.transcribe import get_suppressed_tokens
  4 | 
  5 | 
  6 | def test_suppressed_tokens_minus_1():
  7 |     model = WhisperModel("tiny.en")
  8 | 
  9 |     tokenizer = Tokenizer(model.hf_tokenizer, False)
 10 |     tokens = get_suppressed_tokens(tokenizer, [-1])
 11 |     assert tokens == (
 12 |         1,
 13 |         2,
 14 |         7,
 15 |         8,
 16 |         9,
 17 |         10,
 18 |         14,
 19 |         25,
 20 |         26,
 21 |         27,
 22 |         28,
 23 |         29,
 24 |         31,
 25 |         58,
 26 |         59,
 27 |         60,
 28 |         61,
 29 |         62,
 30 |         63,
 31 |         90,
 32 |         91,
 33 |         92,
 34 |         93,
 35 |         357,
 36 |         366,
 37 |         438,
 38 |         532,
 39 |         685,
 40 |         705,
 41 |         796,
 42 |         930,
 43 |         1058,
 44 |         1220,
 45 |         1267,
 46 |         1279,
 47 |         1303,
 48 |         1343,
 49 |         1377,
 50 |         1391,
 51 |         1635,
 52 |         1782,
 53 |         1875,
 54 |         2162,
 55 |         2361,
 56 |         2488,
 57 |         3467,
 58 |         4008,
 59 |         4211,
 60 |         4600,
 61 |         4808,
 62 |         5299,
 63 |         5855,
 64 |         6329,
 65 |         7203,
 66 |         9609,
 67 |         9959,
 68 |         10563,
 69 |         10786,
 70 |         11420,
 71 |         11709,
 72 |         11907,
 73 |         13163,
 74 |         13697,
 75 |         13700,
 76 |         14808,
 77 |         15306,
 78 |         16410,
 79 |         16791,
 80 |         17992,
 81 |         19203,
 82 |         19510,
 83 |         20724,
 84 |         22305,
 85 |         22935,
 86 |         27007,
 87 |         30109,
 88 |         30420,
 89 |         33409,
 90 |         34949,
 91 |         40283,
 92 |         40493,
 93 |         40549,
 94 |         47282,
 95 |         49146,
 96 |         50257,
 97 |         50357,
 98 |         50358,
 99 |         50359,
100 |         50360,
101 |     )
102 | 
103 | 
104 | def test_suppressed_tokens_minus_value():
105 |     model = WhisperModel("tiny.en")
106 | 
107 |     tokenizer = Tokenizer(model.hf_tokenizer, False)
108 |     tokens = get_suppressed_tokens(tokenizer, [13])
109 |     assert tokens == (13, 50257, 50357, 50358, 50359, 50360)
110 | 
111 | 
112 | def test_split_on_unicode():
113 |     model = WhisperModel("tiny")
114 |     tokenizer = Tokenizer(model.hf_tokenizer, False)
115 | 
116 |     tokens = [8404, 871, 287, 6, 246, 526, 3210, 20378]
117 |     words, word_tokens = tokenizer.split_tokens_on_unicode(tokens)
118 | 
119 |     assert words == [" elle", " est", " l", "'", "\ufffd", "é", "rit", "oire"]
120 |     assert word_tokens == [[8404], [871], [287], [6], [246], [526], [3210], [20378]]
121 | 


--------------------------------------------------------------------------------
/tests/test_transcribe.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import os
  3 | 
  4 | import numpy as np
  5 | 
  6 | from faster_whisper import BatchedInferencePipeline, WhisperModel, decode_audio
  7 | 
  8 | 
  9 | def test_supported_languages():
 10 |     model = WhisperModel("tiny.en")
 11 |     assert model.supported_languages == ["en"]
 12 | 
 13 | 
 14 | def test_transcribe(jfk_path):
 15 |     model = WhisperModel("tiny")
 16 |     segments, info = model.transcribe(jfk_path, word_timestamps=True)
 17 |     assert info.all_language_probs is not None
 18 | 
 19 |     assert info.language == "en"
 20 |     assert info.language_probability > 0.9
 21 |     assert info.duration == 11
 22 | 
 23 |     # Get top language info from all results, which should match the
 24 |     # already existing metadata
 25 |     top_lang, top_lang_score = info.all_language_probs[0]
 26 |     assert info.language == top_lang
 27 |     assert abs(info.language_probability - top_lang_score) < 1e-16
 28 | 
 29 |     segments = list(segments)
 30 | 
 31 |     assert len(segments) == 1
 32 | 
 33 |     segment = segments[0]
 34 | 
 35 |     assert segment.text == (
 36 |         " And so my fellow Americans, ask not what your country can do for you, "
 37 |         "ask what you can do for your country."
 38 |     )
 39 | 
 40 |     assert segment.text == "".join(word.word for word in segment.words)
 41 |     assert segment.start == segment.words[0].start
 42 |     assert segment.end == segment.words[-1].end
 43 |     batched_model = BatchedInferencePipeline(model=model)
 44 |     result, info = batched_model.transcribe(
 45 |         jfk_path, word_timestamps=True, vad_filter=False
 46 |     )
 47 |     assert info.language == "en"
 48 |     assert info.language_probability > 0.7
 49 |     segments = []
 50 |     for segment in result:
 51 |         segments.append(
 52 |             {"start": segment.start, "end": segment.end, "text": segment.text}
 53 |         )
 54 | 
 55 |     assert len(segments) == 1
 56 |     assert segment.text == (
 57 |         " And so my fellow Americans ask not what your country can do for you, "
 58 |         "ask what you can do for your country."
 59 |     )
 60 | 
 61 | 
 62 | def test_batched_transcribe(physcisworks_path):
 63 |     model = WhisperModel("tiny")
 64 |     batched_model = BatchedInferencePipeline(model=model)
 65 |     result, info = batched_model.transcribe(physcisworks_path, batch_size=16)
 66 |     assert info.language == "en"
 67 |     assert info.language_probability > 0.7
 68 |     segments = []
 69 |     for segment in result:
 70 |         segments.append(
 71 |             {"start": segment.start, "end": segment.end, "text": segment.text}
 72 |         )
 73 |     # number of near 30 sec segments
 74 |     assert len(segments) == 7
 75 | 
 76 |     result, info = batched_model.transcribe(
 77 |         physcisworks_path,
 78 |         batch_size=16,
 79 |         without_timestamps=False,
 80 |         word_timestamps=True,
 81 |     )
 82 |     segments = []
 83 |     for segment in result:
 84 |         assert segment.words is not None
 85 |         segments.append(
 86 |             {"start": segment.start, "end": segment.end, "text": segment.text}
 87 |         )
 88 |     assert len(segments) > 7
 89 | 
 90 | 
 91 | def test_empty_audio():
 92 |     audio = np.asarray([], dtype="float32")
 93 |     model = WhisperModel("tiny")
 94 |     pipeline = BatchedInferencePipeline(model=model)
 95 |     assert list(model.transcribe(audio)[0]) == []
 96 |     assert list(pipeline.transcribe(audio)[0]) == []
 97 |     model.detect_language(audio)
 98 | 
 99 | 
100 | def test_prefix_with_timestamps(jfk_path):
101 |     model = WhisperModel("tiny")
102 |     segments, _ = model.transcribe(jfk_path, prefix="And so my fellow Americans")
103 |     segments = list(segments)
104 | 
105 |     assert len(segments) == 1
106 | 
107 |     segment = segments[0]
108 | 
109 |     assert segment.text == (
110 |         " And so my fellow Americans, ask not what your country can do for you, "
111 |         "ask what you can do for your country."
112 |     )
113 | 
114 |     assert segment.start == 0
115 |     assert 10 < segment.end <= 11
116 | 
117 | 
118 | def test_vad(jfk_path):
119 |     model = WhisperModel("tiny")
120 |     segments, info = model.transcribe(
121 |         jfk_path,
122 |         vad_filter=True,
123 |         vad_parameters=dict(min_silence_duration_ms=500, speech_pad_ms=200),
124 |     )
125 |     segments = list(segments)
126 | 
127 |     assert len(segments) == 1
128 |     segment = segments[0]
129 | 
130 |     assert segment.text == (
131 |         " And so my fellow Americans ask not what your country can do for you, "
132 |         "ask what you can do for your country."
133 |     )
134 | 
135 |     assert 0 < segment.start < 1
136 |     assert 10 < segment.end < 11
137 | 
138 |     assert info.vad_options.min_silence_duration_ms == 500
139 |     assert info.vad_options.speech_pad_ms == 200
140 | 
141 | 
142 | def test_stereo_diarization(data_dir):
143 |     model = WhisperModel("tiny")
144 | 
145 |     audio_path = os.path.join(data_dir, "stereo_diarization.wav")
146 |     left, right = decode_audio(audio_path, split_stereo=True)
147 | 
148 |     segments, _ = model.transcribe(left)
149 |     transcription = "".join(segment.text for segment in segments).strip()
150 |     assert transcription == (
151 |         "He began a confused complaint against the wizard, "
152 |         "who had vanished behind the curtain on the left."
153 |     )
154 | 
155 |     segments, _ = model.transcribe(right)
156 |     transcription = "".join(segment.text for segment in segments).strip()
157 |     assert transcription == "The horizon seems extremely distant."
158 | 
159 | 
160 | def test_multilingual_transcription(data_dir):
161 |     model = WhisperModel("tiny")
162 |     pipeline = BatchedInferencePipeline(model)
163 | 
164 |     audio_path = os.path.join(data_dir, "multilingual.mp3")
165 |     audio = decode_audio(audio_path)
166 | 
167 |     segments, info = model.transcribe(
168 |         audio,
169 |         multilingual=True,
170 |         without_timestamps=True,
171 |         condition_on_previous_text=False,
172 |     )
173 |     segments = list(segments)
174 | 
175 |     assert (
176 |         segments[0].text
177 |         == " Permission is hereby granted, free of charge, to any person obtaining a copy of the"
178 |         " software and associated documentation files to deal in the software without restriction,"
179 |         " including without limitation the rights to use, copy, modify, merge, publish, distribute"
180 |         ", sublicence, and or cell copies of the software, and to permit persons to whom the "
181 |         "software is furnished to do so, subject to the following conditions. The above copyright"
182 |         " notice and this permission notice, shall be included in all copies or substantial "
183 |         "portions of the software."
184 |     )
185 | 
186 |     assert (
187 |         segments[1].text
188 |         == " Jedem, der dieses Software und die dazu gehöregen Dokumentationsdatein erhält, wird "
189 |         "hiermit unengeltlich die Genehmigung erteilt, wird der Software und eingeschränkt zu "
190 |         "verfahren. Dies umfasst insbesondere das Recht, die Software zu verwenden, zu "
191 |         "vervielfältigen, zu modifizieren, zu Samenzofügen, zu veröffentlichen, zu verteilen, "
192 |         "unterzulizenzieren und oder kopieren der Software zu verkaufen und diese Rechte "
193 |         "unterfolgen den Bedingungen anderen zu übertragen."
194 |     )
195 | 
196 |     segments, info = pipeline.transcribe(audio, multilingual=True)
197 |     segments = list(segments)
198 | 
199 |     assert (
200 |         segments[0].text
201 |         == " Permission is hereby granted, free of charge, to any person obtaining a copy of the"
202 |         " software and associated documentation files to deal in the software without restriction,"
203 |         " including without limitation the rights to use, copy, modify, merge, publish, distribute"
204 |         ", sublicence, and or cell copies of the software, and to permit persons to whom the "
205 |         "software is furnished to do so, subject to the following conditions. The above copyright"
206 |         " notice and this permission notice, shall be included in all copies or substantial "
207 |         "portions of the software."
208 |     )
209 |     assert (
210 |         "Dokumentationsdatein erhält, wird hiermit unengeltlich die Genehmigung erteilt,"
211 |         " wird der Software und eingeschränkt zu verfahren. Dies umfasst insbesondere das Recht,"
212 |         " die Software zu verwenden, zu vervielfältigen, zu modifizieren"
213 |         in segments[1].text
214 |     )
215 | 
216 | 
217 | def test_hotwords(data_dir):
218 |     model = WhisperModel("tiny")
219 |     pipeline = BatchedInferencePipeline(model)
220 | 
221 |     audio_path = os.path.join(data_dir, "hotwords.mp3")
222 |     audio = decode_audio(audio_path)
223 | 
224 |     segments, info = model.transcribe(audio, hotwords="ComfyUI")
225 |     segments = list(segments)
226 | 
227 |     assert "ComfyUI" in segments[0].text
228 |     assert info.transcription_options.hotwords == "ComfyUI"
229 | 
230 |     segments, info = pipeline.transcribe(audio, hotwords="ComfyUI")
231 |     segments = list(segments)
232 | 
233 |     assert "ComfyUI" in segments[0].text
234 |     assert info.transcription_options.hotwords == "ComfyUI"
235 | 
236 | 
237 | def test_transcribe_signature():
238 |     model_transcribe_args = set(inspect.getargs(WhisperModel.transcribe.__code__).args)
239 |     pipeline_transcribe_args = set(
240 |         inspect.getargs(BatchedInferencePipeline.transcribe.__code__).args
241 |     )
242 |     pipeline_transcribe_args.remove("batch_size")
243 | 
244 |     assert model_transcribe_args == pipeline_transcribe_args
245 | 
246 | 
247 | def test_monotonic_timestamps(physcisworks_path):
248 |     model = WhisperModel("tiny")
249 |     pipeline = BatchedInferencePipeline(model=model)
250 | 
251 |     segments, info = model.transcribe(physcisworks_path, word_timestamps=True)
252 |     segments = list(segments)
253 | 
254 |     for i in range(len(segments) - 1):
255 |         assert segments[i].start <= segments[i].end
256 |         assert segments[i].end <= segments[i + 1].start
257 |         for word in segments[i].words:
258 |             assert word.start <= word.end
259 |             assert word.end <= segments[i].end
260 |     assert segments[-1].end <= info.duration
261 | 
262 |     segments, info = pipeline.transcribe(physcisworks_path, word_timestamps=True)
263 |     segments = list(segments)
264 | 
265 |     for i in range(len(segments) - 1):
266 |         assert segments[i].start <= segments[i].end
267 |         assert segments[i].end <= segments[i + 1].start
268 |         for word in segments[i].words:
269 |             assert word.start <= word.end
270 |             assert word.end <= segments[i].end
271 |     assert segments[-1].end <= info.duration
272 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from faster_whisper import available_models, download_model
 4 | 
 5 | 
 6 | def test_available_models():
 7 |     models = available_models()
 8 |     assert isinstance(models, list)
 9 |     assert "tiny" in models
10 | 
11 | 
12 | def test_download_model(tmpdir):
13 |     output_dir = str(tmpdir.join("model"))
14 | 
15 |     model_dir = download_model("tiny", output_dir=output_dir)
16 | 
17 |     assert model_dir == output_dir
18 |     assert os.path.isdir(model_dir)
19 |     assert not os.path.islink(model_dir)
20 | 
21 |     for filename in os.listdir(model_dir):
22 |         path = os.path.join(model_dir, filename)
23 |         assert not os.path.islink(path)
24 | 
25 | 
26 | def test_download_model_in_cache(tmpdir):
27 |     cache_dir = str(tmpdir.join("model"))
28 |     download_model("tiny", cache_dir=cache_dir)
29 |     assert os.path.isdir(cache_dir)
30 | 


--------------------------------------------------------------------------------