├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── benchmark ├── benchmark.m4a ├── evaluate_yt_commons.py ├── memory_benchmark.py ├── normalizer.json ├── requirements.benchmark.txt ├── speed_benchmark.py ├── utils.py └── wer_benchmark.py ├── docker ├── Dockerfile ├── infer.py └── jfk.flac ├── faster_whisper ├── __init__.py ├── assets │ ├── __init__.py │ ├── silero_decoder_v5.onnx │ └── silero_encoder_v5.onnx ├── audio.py ├── feature_extractor.py ├── tokenizer.py ├── transcribe.py ├── utils.py ├── vad.py └── version.py ├── requirements.conversion.txt ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── conftest.py ├── data ├── hotwords.mp3 ├── jfk.flac ├── multilingual.mp3 ├── physicsworks.wav └── stereo_diarization.wav ├── test_tokenizer.py ├── test_transcribe.py └── test_utils.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | tags: 8 | - v* 9 | pull_request: 10 | branches: 11 | - master 12 | 13 | jobs: 14 | check-code-format: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Set up Python 3.9 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: 3.9 24 | 25 | - name: Install module 26 | run: | 27 | pip install wheel 28 | pip install -e .[dev] 29 | 30 | - name: Check code format with Black 31 | run: | 32 | black --check . 33 | 34 | - name: Check imports order with isort 35 | run: | 36 | isort --check-only . 37 | 38 | - name: Check code style with Flake8 39 | if: ${{ always() }} 40 | run: | 41 | flake8 . 42 | 43 | 44 | run-tests: 45 | runs-on: ubuntu-latest 46 | 47 | steps: 48 | - uses: actions/checkout@v4 49 | 50 | - name: Set up Python 3.9 51 | uses: actions/setup-python@v5 52 | with: 53 | python-version: 3.9 54 | 55 | - name: Install module 56 | run: | 57 | pip install wheel 58 | pip install -e .[dev] 59 | 60 | - name: Run pytest 61 | run: | 62 | pytest -v tests/ 63 | 64 | 65 | build-and-push-package: 66 | runs-on: ubuntu-latest 67 | needs: [check-code-format, run-tests] 68 | 69 | steps: 70 | - uses: actions/checkout@v4 71 | 72 | - name: Set up Python 3.9 73 | uses: actions/setup-python@v5 74 | with: 75 | python-version: 3.9 76 | 77 | - name: Install dependencies 78 | run: | 79 | pip install wheel 80 | 81 | - name: Build package 82 | run: | 83 | python3 setup.py sdist bdist_wheel 84 | 85 | - name: Push package on PyPI 86 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 87 | uses: pypa/gh-action-pypi-publish@release/v1 88 | with: 89 | user: __token__ 90 | password: ${{ secrets.PYPI_API_TOKEN }} 91 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / Optimized / DLL Files 2 | *.pyc 3 | *.pyo 4 | *.pyd 5 | __pycache__/ 6 | 7 | # Distribution / Packaging 8 | venv/ 9 | 10 | # Unit Test 11 | .pytest_cache/ 12 | 13 | # Ignore IDE, Editor Files 14 | .idea/ 15 | .vscode/ 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to faster-whisper 2 | 3 | Contributions are welcome! Here are some pointers to help you install the library for development and validate your changes before submitting a pull request. 4 | 5 | ## Install the library for development 6 | 7 | We recommend installing the module in editable mode with the `dev` extra requirements: 8 | 9 | ```bash 10 | git clone https://github.com/SYSTRAN/faster-whisper.git 11 | cd faster-whisper/ 12 | pip install -e .[dev] 13 | ``` 14 | 15 | ## Validate the changes before creating a pull request 16 | 17 | 1. Make sure the existing tests are still passing (and consider adding new tests as well!): 18 | 19 | ```bash 20 | pytest tests/ 21 | ``` 22 | 23 | 2. Reformat and validate the code with the following tools: 24 | 25 | ```bash 26 | black . 27 | isort . 28 | flake8 . 29 | ``` 30 | 31 | These steps are also run automatically in the CI when you open the pull request. 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 SYSTRAN 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include faster_whisper/assets/silero_encoder_v5.onnx 2 | include faster_whisper/assets/silero_decoder_v5.onnx 3 | include requirements.txt 4 | include requirements.conversion.txt 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CI](https://github.com/SYSTRAN/faster-whisper/workflows/CI/badge.svg)](https://github.com/SYSTRAN/faster-whisper/actions?query=workflow%3ACI) [![PyPI version](https://badge.fury.io/py/faster-whisper.svg)](https://badge.fury.io/py/faster-whisper) 2 | 3 | # Faster Whisper transcription with CTranslate2 4 | 5 | **faster-whisper** is a reimplementation of OpenAI's Whisper model using [CTranslate2](https://github.com/OpenNMT/CTranslate2/), which is a fast inference engine for Transformer models. 6 | 7 | This implementation is up to 4 times faster than [openai/whisper](https://github.com/openai/whisper) for the same accuracy while using less memory. The efficiency can be further improved with 8-bit quantization on both CPU and GPU. 8 | 9 | ## Benchmark 10 | 11 | ### Whisper 12 | 13 | For reference, here's the time and memory usage that are required to transcribe [**13 minutes**](https://www.youtube.com/watch?v=0u7tTptBo9I) of audio using different implementations: 14 | 15 | * [openai/whisper](https://github.com/openai/whisper)@[v20240930](https://github.com/openai/whisper/tree/v20240930) 16 | * [whisper.cpp](https://github.com/ggerganov/whisper.cpp)@[v1.7.2](https://github.com/ggerganov/whisper.cpp/tree/v1.7.2) 17 | * [transformers](https://github.com/huggingface/transformers)@[v4.46.3](https://github.com/huggingface/transformers/tree/v4.46.3) 18 | * [faster-whisper](https://github.com/SYSTRAN/faster-whisper)@[v1.1.0](https://github.com/SYSTRAN/faster-whisper/tree/v1.1.0) 19 | 20 | ### Large-v2 model on GPU 21 | 22 | | Implementation | Precision | Beam size | Time | VRAM Usage | 23 | | --- | --- | --- | --- | --- | 24 | | openai/whisper | fp16 | 5 | 2m23s | 4708MB | 25 | | whisper.cpp (Flash Attention) | fp16 | 5 | 1m05s | 4127MB | 26 | | transformers (SDPA)[^1] | fp16 | 5 | 1m52s | 4960MB | 27 | | faster-whisper | fp16 | 5 | 1m03s | 4525MB | 28 | | faster-whisper (`batch_size=8`) | fp16 | 5 | 17s | 6090MB | 29 | | faster-whisper | int8 | 5 | 59s | 2926MB | 30 | | faster-whisper (`batch_size=8`) | int8 | 5 | 16s | 4500MB | 31 | 32 | ### distil-whisper-large-v3 model on GPU 33 | 34 | | Implementation | Precision | Beam size | Time | YT Commons WER | 35 | | --- | --- | --- | --- | --- | 36 | | transformers (SDPA) (`batch_size=16`) | fp16 | 5 | 46m12s | 14.801 | 37 | | faster-whisper (`batch_size=16`) | fp16 | 5 | 25m50s | 13.527 | 38 | 39 | *GPU Benchmarks are Executed with CUDA 12.4 on a NVIDIA RTX 3070 Ti 8GB.* 40 | [^1]: transformers OOM for any batch size > 1 41 | 42 | ### Small model on CPU 43 | 44 | | Implementation | Precision | Beam size | Time | RAM Usage | 45 | | --- | --- | --- | --- | --- | 46 | | openai/whisper | fp32 | 5 | 6m58s | 2335MB | 47 | | whisper.cpp | fp32 | 5 | 2m05s | 1049MB | 48 | | whisper.cpp (OpenVINO) | fp32 | 5 | 1m45s | 1642MB | 49 | | faster-whisper | fp32 | 5 | 2m37s | 2257MB | 50 | | faster-whisper (`batch_size=8`) | fp32 | 5 | 1m06s | 4230MB | 51 | | faster-whisper | int8 | 5 | 1m42s | 1477MB | 52 | | faster-whisper (`batch_size=8`) | int8 | 5 | 51s | 3608MB | 53 | 54 | *Executed with 8 threads on an Intel Core i7-12700K.* 55 | 56 | 57 | ## Requirements 58 | 59 | * Python 3.9 or greater 60 | 61 | Unlike openai-whisper, FFmpeg does **not** need to be installed on the system. The audio is decoded with the Python library [PyAV](https://github.com/PyAV-Org/PyAV) which bundles the FFmpeg libraries in its package. 62 | 63 | ### GPU 64 | 65 | GPU execution requires the following NVIDIA libraries to be installed: 66 | 67 | * [cuBLAS for CUDA 12](https://developer.nvidia.com/cublas) 68 | * [cuDNN 9 for CUDA 12](https://developer.nvidia.com/cudnn) 69 | 70 | **Note**: The latest versions of `ctranslate2` only support CUDA 12 and cuDNN 9. For CUDA 11 and cuDNN 8, the current workaround is downgrading to the `3.24.0` version of `ctranslate2`, for CUDA 12 and cuDNN 8, downgrade to the `4.4.0` version of `ctranslate2`, (This can be done with `pip install --force-reinstall ctranslate2==4.4.0` or specifying the version in a `requirements.txt`). 71 | 72 | There are multiple ways to install the NVIDIA libraries mentioned above. The recommended way is described in the official NVIDIA documentation, but we also suggest other installation methods below. 73 | 74 |
75 | Other installation methods (click to expand) 76 | 77 | 78 | **Note:** For all these methods below, keep in mind the above note regarding CUDA versions. Depending on your setup, you may need to install the _CUDA 11_ versions of libraries that correspond to the CUDA 12 libraries listed in the instructions below. 79 | 80 | #### Use Docker 81 | 82 | The libraries (cuBLAS, cuDNN) are installed in this official NVIDIA CUDA Docker images: `nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04`. 83 | 84 | #### Install with `pip` (Linux only) 85 | 86 | On Linux these libraries can be installed with `pip`. Note that `LD_LIBRARY_PATH` must be set before launching Python. 87 | 88 | ```bash 89 | pip install nvidia-cublas-cu12 nvidia-cudnn-cu12==9.* 90 | 91 | export LD_LIBRARY_PATH=`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'` 92 | ``` 93 | 94 | #### Download the libraries from Purfview's repository (Windows & Linux) 95 | 96 | Purfview's [whisper-standalone-win](https://github.com/Purfview/whisper-standalone-win) provides the required NVIDIA libraries for Windows & Linux in a [single archive](https://github.com/Purfview/whisper-standalone-win/releases/tag/libs). Decompress the archive and place the libraries in a directory included in the `PATH`. 97 | 98 |
99 | 100 | ## Installation 101 | 102 | The module can be installed from [PyPI](https://pypi.org/project/faster-whisper/): 103 | 104 | ```bash 105 | pip install faster-whisper 106 | ``` 107 | 108 |
109 | Other installation methods (click to expand) 110 | 111 | ### Install the master branch 112 | 113 | ```bash 114 | pip install --force-reinstall "faster-whisper @ https://github.com/SYSTRAN/faster-whisper/archive/refs/heads/master.tar.gz" 115 | ``` 116 | 117 | ### Install a specific commit 118 | 119 | ```bash 120 | pip install --force-reinstall "faster-whisper @ https://github.com/SYSTRAN/faster-whisper/archive/a4f1cc8f11433e454c3934442b5e1a4ed5e865c3.tar.gz" 121 | ``` 122 | 123 |
124 | 125 | ## Usage 126 | 127 | ### Faster-whisper 128 | 129 | ```python 130 | from faster_whisper import WhisperModel 131 | 132 | model_size = "large-v3" 133 | 134 | # Run on GPU with FP16 135 | model = WhisperModel(model_size, device="cuda", compute_type="float16") 136 | 137 | # or run on GPU with INT8 138 | # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16") 139 | # or run on CPU with INT8 140 | # model = WhisperModel(model_size, device="cpu", compute_type="int8") 141 | 142 | segments, info = model.transcribe("audio.mp3", beam_size=5) 143 | 144 | print("Detected language '%s' with probability %f" % (info.language, info.language_probability)) 145 | 146 | for segment in segments: 147 | print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) 148 | ``` 149 | 150 | **Warning:** `segments` is a *generator* so the transcription only starts when you iterate over it. The transcription can be run to completion by gathering the segments in a list or a `for` loop: 151 | 152 | ```python 153 | segments, _ = model.transcribe("audio.mp3") 154 | segments = list(segments) # The transcription will actually run here. 155 | ``` 156 | 157 | ### Batched Transcription 158 | The following code snippet illustrates how to run batched transcription on an example audio file. `BatchedInferencePipeline.transcribe` is a drop-in replacement for `WhisperModel.transcribe` 159 | 160 | ```python 161 | from faster_whisper import WhisperModel, BatchedInferencePipeline 162 | 163 | model = WhisperModel("turbo", device="cuda", compute_type="float16") 164 | batched_model = BatchedInferencePipeline(model=model) 165 | segments, info = batched_model.transcribe("audio.mp3", batch_size=16) 166 | 167 | for segment in segments: 168 | print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) 169 | ``` 170 | 171 | ### Faster Distil-Whisper 172 | 173 | The Distil-Whisper checkpoints are compatible with the Faster-Whisper package. In particular, the latest [distil-large-v3](https://huggingface.co/distil-whisper/distil-large-v3) 174 | checkpoint is intrinsically designed to work with the Faster-Whisper transcription algorithm. The following code snippet 175 | demonstrates how to run inference with distil-large-v3 on a specified audio file: 176 | 177 | ```python 178 | from faster_whisper import WhisperModel 179 | 180 | model_size = "distil-large-v3" 181 | 182 | model = WhisperModel(model_size, device="cuda", compute_type="float16") 183 | segments, info = model.transcribe("audio.mp3", beam_size=5, language="en", condition_on_previous_text=False) 184 | 185 | for segment in segments: 186 | print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) 187 | ``` 188 | 189 | For more information about the distil-large-v3 model, refer to the original [model card](https://huggingface.co/distil-whisper/distil-large-v3). 190 | 191 | ### Word-level timestamps 192 | 193 | ```python 194 | segments, _ = model.transcribe("audio.mp3", word_timestamps=True) 195 | 196 | for segment in segments: 197 | for word in segment.words: 198 | print("[%.2fs -> %.2fs] %s" % (word.start, word.end, word.word)) 199 | ``` 200 | 201 | ### VAD filter 202 | 203 | The library integrates the [Silero VAD](https://github.com/snakers4/silero-vad) model to filter out parts of the audio without speech: 204 | 205 | ```python 206 | segments, _ = model.transcribe("audio.mp3", vad_filter=True) 207 | ``` 208 | 209 | The default behavior is conservative and only removes silence longer than 2 seconds. See the available VAD parameters and default values in the [source code](https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/vad.py). They can be customized with the dictionary argument `vad_parameters`: 210 | 211 | ```python 212 | segments, _ = model.transcribe( 213 | "audio.mp3", 214 | vad_filter=True, 215 | vad_parameters=dict(min_silence_duration_ms=500), 216 | ) 217 | ``` 218 | Vad filter is enabled by default for batched transcription. 219 | 220 | ### Logging 221 | 222 | The library logging level can be configured like this: 223 | 224 | ```python 225 | import logging 226 | 227 | logging.basicConfig() 228 | logging.getLogger("faster_whisper").setLevel(logging.DEBUG) 229 | ``` 230 | 231 | ### Going further 232 | 233 | See more model and transcription options in the [`WhisperModel`](https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/transcribe.py) class implementation. 234 | 235 | ## Community integrations 236 | 237 | Here is a non exhaustive list of open-source projects using faster-whisper. Feel free to add your project to the list! 238 | 239 | 240 | * [speaches](https://github.com/speaches-ai/speaches) is an OpenAI compatible server using `faster-whisper`. It's easily deployable with Docker, works with OpenAI SDKs/CLI, supports streaming, and live transcription. 241 | * [WhisperX](https://github.com/m-bain/whisperX) is an award-winning Python library that offers speaker diarization and accurate word-level timestamps using wav2vec2 alignment 242 | * [whisper-ctranslate2](https://github.com/Softcatala/whisper-ctranslate2) is a command line client based on faster-whisper and compatible with the original client from openai/whisper. 243 | * [whisper-diarize](https://github.com/MahmoudAshraf97/whisper-diarization) is a speaker diarization tool that is based on faster-whisper and NVIDIA NeMo. 244 | * [whisper-standalone-win](https://github.com/Purfview/whisper-standalone-win) Standalone CLI executables of faster-whisper for Windows, Linux & macOS. 245 | * [asr-sd-pipeline](https://github.com/hedrergudene/asr-sd-pipeline) provides a scalable, modular, end to end multi-speaker speech to text solution implemented using AzureML pipelines. 246 | * [Open-Lyrics](https://github.com/zh-plus/Open-Lyrics) is a Python library that transcribes voice files using faster-whisper, and translates/polishes the resulting text into `.lrc` files in the desired language using OpenAI-GPT. 247 | * [wscribe](https://github.com/geekodour/wscribe) is a flexible transcript generation tool supporting faster-whisper, it can export word level transcript and the exported transcript then can be edited with [wscribe-editor](https://github.com/geekodour/wscribe-editor) 248 | * [aTrain](https://github.com/BANDAS-Center/aTrain) is a graphical user interface implementation of faster-whisper developed at the BANDAS-Center at the University of Graz for transcription and diarization in Windows ([Windows Store App](https://apps.microsoft.com/detail/atrain/9N15Q44SZNS2)) and Linux. 249 | * [Whisper-Streaming](https://github.com/ufal/whisper_streaming) implements real-time mode for offline Whisper-like speech-to-text models with faster-whisper as the most recommended back-end. It implements a streaming policy with self-adaptive latency based on the actual source complexity, and demonstrates the state of the art. 250 | * [WhisperLive](https://github.com/collabora/WhisperLive) is a nearly-live implementation of OpenAI's Whisper which uses faster-whisper as the backend to transcribe audio in real-time. 251 | * [Faster-Whisper-Transcriber](https://github.com/BBC-Esq/ctranslate2-faster-whisper-transcriber) is a simple but reliable voice transcriber that provides a user-friendly interface. 252 | * [Open-dubbing](https://github.com/softcatala/open-dubbing) is open dubbing is an AI dubbing system which uses machine learning models to automatically translate and synchronize audio dialogue into different languages. 253 | 254 | ## Model conversion 255 | 256 | When loading a model from its size such as `WhisperModel("large-v3")`, the corresponding CTranslate2 model is automatically downloaded from the [Hugging Face Hub](https://huggingface.co/Systran). 257 | 258 | We also provide a script to convert any Whisper models compatible with the Transformers library. They could be the original OpenAI models or user fine-tuned models. 259 | 260 | For example the command below converts the [original "large-v3" Whisper model](https://huggingface.co/openai/whisper-large-v3) and saves the weights in FP16: 261 | 262 | ```bash 263 | pip install transformers[torch]>=4.23 264 | 265 | ct2-transformers-converter --model openai/whisper-large-v3 --output_dir whisper-large-v3-ct2 266 | --copy_files tokenizer.json preprocessor_config.json --quantization float16 267 | ``` 268 | 269 | * The option `--model` accepts a model name on the Hub or a path to a model directory. 270 | * If the option `--copy_files tokenizer.json` is not used, the tokenizer configuration is automatically downloaded when the model is loaded later. 271 | 272 | Models can also be converted from the code. See the [conversion API](https://opennmt.net/CTranslate2/python/ctranslate2.converters.TransformersConverter.html). 273 | 274 | ### Load a converted model 275 | 276 | 1. Directly load the model from a local directory: 277 | ```python 278 | model = faster_whisper.WhisperModel("whisper-large-v3-ct2") 279 | ``` 280 | 281 | 2. [Upload your model to the Hugging Face Hub](https://huggingface.co/docs/transformers/model_sharing#upload-with-the-web-interface) and load it from its name: 282 | ```python 283 | model = faster_whisper.WhisperModel("username/whisper-large-v3-ct2") 284 | ``` 285 | 286 | ## Comparing performance against other implementations 287 | 288 | If you are comparing the performance against other Whisper implementations, you should make sure to run the comparison with similar settings. In particular: 289 | 290 | * Verify that the same transcription options are used, especially the same beam size. For example in openai/whisper, `model.transcribe` uses a default beam size of 1 but here we use a default beam size of 5. 291 | * Transcription speed is closely affected by the number of words in the transcript, so ensure that other implementations have a similar WER (Word Error Rate) to this one. 292 | * When running on CPU, make sure to set the same number of threads. Many frameworks will read the environment variable `OMP_NUM_THREADS`, which can be set when running your script: 293 | 294 | ```bash 295 | OMP_NUM_THREADS=4 python3 my_script.py 296 | ``` 297 | -------------------------------------------------------------------------------- /benchmark/benchmark.m4a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/benchmark/benchmark.m4a -------------------------------------------------------------------------------- /benchmark/evaluate_yt_commons.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | 5 | from io import BytesIO 6 | 7 | from datasets import load_dataset 8 | from jiwer import wer 9 | from pytubefix import YouTube 10 | from pytubefix.exceptions import VideoUnavailable 11 | from tqdm import tqdm 12 | from transformers.models.whisper.english_normalizer import EnglishTextNormalizer 13 | 14 | from faster_whisper import BatchedInferencePipeline, WhisperModel, decode_audio 15 | 16 | 17 | def url_to_audio(row): 18 | buffer = BytesIO() 19 | yt = YouTube(row["link"]) 20 | try: 21 | video = ( 22 | yt.streams.filter(only_audio=True, mime_type="audio/mp4") 23 | .order_by("bitrate") 24 | .desc() 25 | .last() 26 | ) 27 | video.stream_to_buffer(buffer) 28 | buffer.seek(0) 29 | row["audio"] = decode_audio(buffer) 30 | except VideoUnavailable: 31 | print(f'Failed to download: {row["link"]}') 32 | row["audio"] = [] 33 | return row 34 | 35 | 36 | parser = argparse.ArgumentParser(description="WER benchmark") 37 | parser.add_argument( 38 | "--audio_numb", 39 | type=int, 40 | default=None, 41 | help="Specify the number of validation audio files in the dataset." 42 | " Set to None to retrieve all audio files.", 43 | ) 44 | args = parser.parse_args() 45 | 46 | with open(os.path.join(os.path.dirname(__file__), "normalizer.json"), "r") as f: 47 | normalizer = EnglishTextNormalizer(json.load(f)) 48 | 49 | dataset = load_dataset("mobiuslabsgmbh/youtube-commons-asr-eval", streaming=True).map( 50 | url_to_audio 51 | ) 52 | model = WhisperModel("large-v3", device="cuda") 53 | pipeline = BatchedInferencePipeline(model, device="cuda") 54 | 55 | 56 | all_transcriptions = [] 57 | all_references = [] 58 | # iterate over the dataset and run inference 59 | for i, row in tqdm(enumerate(dataset["test"]), desc="Evaluating..."): 60 | if not row["audio"]: 61 | continue 62 | result, info = pipeline.transcribe( 63 | row["audio"][0], 64 | batch_size=8, 65 | word_timestamps=False, 66 | without_timestamps=True, 67 | ) 68 | 69 | all_transcriptions.append("".join(segment.text for segment in result)) 70 | all_references.append(row["text"][0]) 71 | if args.audio_numb and i == (args.audio_numb - 1): 72 | break 73 | 74 | # normalize predictions and references 75 | all_transcriptions = [normalizer(transcription) for transcription in all_transcriptions] 76 | all_references = [normalizer(reference) for reference in all_references] 77 | 78 | # compute the WER metric 79 | word_error_rate = 100 * wer(hypothesis=all_transcriptions, reference=all_references) 80 | print("WER: %.3f" % word_error_rate) 81 | -------------------------------------------------------------------------------- /benchmark/memory_benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | 4 | from typing import Callable 5 | 6 | import py3nvml.py3nvml as nvml 7 | 8 | from memory_profiler import memory_usage 9 | from utils import MyThread, get_logger, inference 10 | 11 | logger = get_logger("faster-whisper") 12 | parser = argparse.ArgumentParser(description="Memory benchmark") 13 | parser.add_argument( 14 | "--gpu_memory", action="store_true", help="Measure GPU memory usage" 15 | ) 16 | parser.add_argument("--device-index", type=int, default=0, help="GPU device index") 17 | parser.add_argument( 18 | "--interval", 19 | type=float, 20 | default=0.5, 21 | help="Interval at which measurements are collected", 22 | ) 23 | args = parser.parse_args() 24 | device_idx = args.device_index 25 | interval = args.interval 26 | 27 | 28 | def measure_memory(func: Callable[[], None]): 29 | if args.gpu_memory: 30 | logger.info( 31 | "Measuring maximum GPU memory usage on GPU device." 32 | " Make sure to not have additional processes running on the same GPU." 33 | ) 34 | # init nvml 35 | nvml.nvmlInit() 36 | handle = nvml.nvmlDeviceGetHandleByIndex(device_idx) 37 | gpu_name = nvml.nvmlDeviceGetName(handle) 38 | gpu_memory_limit = nvml.nvmlDeviceGetMemoryInfo(handle).total >> 20 39 | gpu_power_limit = nvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000.0 40 | info = {"gpu_memory_usage": [], "gpu_power_usage": []} 41 | 42 | def _get_gpu_info(): 43 | while True: 44 | info["gpu_memory_usage"].append( 45 | nvml.nvmlDeviceGetMemoryInfo(handle).used >> 20 46 | ) 47 | info["gpu_power_usage"].append( 48 | nvml.nvmlDeviceGetPowerUsage(handle) / 1000 49 | ) 50 | time.sleep(interval) 51 | 52 | if stop: 53 | break 54 | 55 | return info 56 | 57 | stop = False 58 | thread = MyThread(_get_gpu_info, params=()) 59 | thread.start() 60 | func() 61 | stop = True 62 | thread.join() 63 | result = thread.get_result() 64 | 65 | # shutdown nvml 66 | nvml.nvmlShutdown() 67 | max_memory_usage = max(result["gpu_memory_usage"]) 68 | max_power_usage = max(result["gpu_power_usage"]) 69 | print("GPU name: %s" % gpu_name) 70 | print("GPU device index: %s" % device_idx) 71 | print( 72 | "Maximum GPU memory usage: %dMiB / %dMiB (%.2f%%)" 73 | % ( 74 | max_memory_usage, 75 | gpu_memory_limit, 76 | (max_memory_usage / gpu_memory_limit) * 100, 77 | ) 78 | ) 79 | print( 80 | "Maximum GPU power usage: %dW / %dW (%.2f%%)" 81 | % ( 82 | max_power_usage, 83 | gpu_power_limit, 84 | (max_power_usage / gpu_power_limit) * 100, 85 | ) 86 | ) 87 | else: 88 | logger.info("Measuring maximum increase of memory usage.") 89 | max_usage = memory_usage(func, max_usage=True, interval=interval) 90 | print("Maximum increase of RAM memory usage: %d MiB" % max_usage) 91 | 92 | 93 | if __name__ == "__main__": 94 | measure_memory(inference) 95 | -------------------------------------------------------------------------------- /benchmark/normalizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "accessorise": "accessorize", 3 | "accessorised": "accessorized", 4 | "accessorises": "accessorizes", 5 | "accessorising": "accessorizing", 6 | "acclimatisation": "acclimatization", 7 | "acclimatise": "acclimatize", 8 | "acclimatised": "acclimatized", 9 | "acclimatises": "acclimatizes", 10 | "acclimatising": "acclimatizing", 11 | "accoutrements": "accouterments", 12 | "aeon": "eon", 13 | "aeons": "eons", 14 | "aerogramme": "aerogram", 15 | "aerogrammes": "aerograms", 16 | "aeroplane": "airplane", 17 | "aeroplanes": "airplanes", 18 | "aesthete": "esthete", 19 | "aesthetes": "esthetes", 20 | "aesthetic": "esthetic", 21 | "aesthetically": "esthetically", 22 | "aesthetics": "esthetics", 23 | "aetiology": "etiology", 24 | "ageing": "aging", 25 | "aggrandisement": "aggrandizement", 26 | "agonise": "agonize", 27 | "agonised": "agonized", 28 | "agonises": "agonizes", 29 | "agonising": "agonizing", 30 | "agonisingly": "agonizingly", 31 | "almanack": "almanac", 32 | "almanacks": "almanacs", 33 | "aluminium": "aluminum", 34 | "amortisable": "amortizable", 35 | "amortisation": "amortization", 36 | "amortisations": "amortizations", 37 | "amortise": "amortize", 38 | "amortised": "amortized", 39 | "amortises": "amortizes", 40 | "amortising": "amortizing", 41 | "amphitheatre": "amphitheater", 42 | "amphitheatres": "amphitheaters", 43 | "anaemia": "anemia", 44 | "anaemic": "anemic", 45 | "anaesthesia": "anesthesia", 46 | "anaesthetic": "anesthetic", 47 | "anaesthetics": "anesthetics", 48 | "anaesthetise": "anesthetize", 49 | "anaesthetised": "anesthetized", 50 | "anaesthetises": "anesthetizes", 51 | "anaesthetising": "anesthetizing", 52 | "anaesthetist": "anesthetist", 53 | "anaesthetists": "anesthetists", 54 | "anaesthetize": "anesthetize", 55 | "anaesthetized": "anesthetized", 56 | "anaesthetizes": "anesthetizes", 57 | "anaesthetizing": "anesthetizing", 58 | "analogue": "analog", 59 | "analogues": "analogs", 60 | "analyse": "analyze", 61 | "analysed": "analyzed", 62 | "analyses": "analyzes", 63 | "analysing": "analyzing", 64 | "anglicise": "anglicize", 65 | "anglicised": "anglicized", 66 | "anglicises": "anglicizes", 67 | "anglicising": "anglicizing", 68 | "annualised": "annualized", 69 | "antagonise": "antagonize", 70 | "antagonised": "antagonized", 71 | "antagonises": "antagonizes", 72 | "antagonising": "antagonizing", 73 | "apologise": "apologize", 74 | "apologised": "apologized", 75 | "apologises": "apologizes", 76 | "apologising": "apologizing", 77 | "appal": "appall", 78 | "appals": "appalls", 79 | "appetiser": "appetizer", 80 | "appetisers": "appetizers", 81 | "appetising": "appetizing", 82 | "appetisingly": "appetizingly", 83 | "arbour": "arbor", 84 | "arbours": "arbors", 85 | "archaeologically": "archeologically", 86 | "archaeologist": "archeologist", 87 | "archaeologists": "archeologists", 88 | "archaeology": "archeology", 89 | "archeological": "archaeological", 90 | "ardour": "ardor", 91 | "armour": "armor", 92 | "armoured": "armored", 93 | "armourer": "armorer", 94 | "armourers": "armorers", 95 | "armouries": "armories", 96 | "armoury": "armory", 97 | "artefact": "artifact", 98 | "artefacts": "artifacts", 99 | "authorise": "authorize", 100 | "authorised": "authorized", 101 | "authorises": "authorizes", 102 | "authorising": "authorizing", 103 | "axe": "ax", 104 | "backpedalled": "backpedaled", 105 | "backpedalling": "backpedaling", 106 | "bannister": "banister", 107 | "bannisters": "banisters", 108 | "baptise": "baptize", 109 | "baptised": "baptized", 110 | "baptises": "baptizes", 111 | "baptising": "baptizing", 112 | "bastardise": "bastardize", 113 | "bastardised": "bastardized", 114 | "bastardises": "bastardizes", 115 | "bastardising": "bastardizing", 116 | "battleax": "battleaxe", 117 | "baulk": "balk", 118 | "baulked": "balked", 119 | "baulking": "balking", 120 | "baulks": "balks", 121 | "bedevilled": "bedeviled", 122 | "bedevilling": "bedeviling", 123 | "behaviour": "behavior", 124 | "behavioural": "behavioral", 125 | "behaviourism": "behaviorism", 126 | "behaviourist": "behaviorist", 127 | "behaviourists": "behaviorists", 128 | "behaviours": "behaviors", 129 | "behove": "behoove", 130 | "behoved": "behooved", 131 | "behoves": "behooves", 132 | "bejewelled": "bejeweled", 133 | "belabour": "belabor", 134 | "belaboured": "belabored", 135 | "belabouring": "belaboring", 136 | "belabours": "belabors", 137 | "bevelled": "beveled", 138 | "bevvies": "bevies", 139 | "bevvy": "bevy", 140 | "biassed": "biased", 141 | "biassing": "biasing", 142 | "bingeing": "binging", 143 | "bougainvillaea": "bougainvillea", 144 | "bougainvillaeas": "bougainvilleas", 145 | "bowdlerise": "bowdlerize", 146 | "bowdlerised": "bowdlerized", 147 | "bowdlerises": "bowdlerizes", 148 | "bowdlerising": "bowdlerizing", 149 | "breathalyse": "breathalyze", 150 | "breathalysed": "breathalyzed", 151 | "breathalyser": "breathalyzer", 152 | "breathalysers": "breathalyzers", 153 | "breathalyses": "breathalyzes", 154 | "breathalysing": "breathalyzing", 155 | "brutalise": "brutalize", 156 | "brutalised": "brutalized", 157 | "brutalises": "brutalizes", 158 | "brutalising": "brutalizing", 159 | "busses": "buses", 160 | "bussing": "busing", 161 | "caesarean": "cesarean", 162 | "caesareans": "cesareans", 163 | "calibre": "caliber", 164 | "calibres": "calibers", 165 | "calliper": "caliper", 166 | "callipers": "calipers", 167 | "callisthenics": "calisthenics", 168 | "canalise": "canalize", 169 | "canalised": "canalized", 170 | "canalises": "canalizes", 171 | "canalising": "canalizing", 172 | "cancelation": "cancellation", 173 | "cancelations": "cancellations", 174 | "cancelled": "canceled", 175 | "cancelling": "canceling", 176 | "candour": "candor", 177 | "cannibalise": "cannibalize", 178 | "cannibalised": "cannibalized", 179 | "cannibalises": "cannibalizes", 180 | "cannibalising": "cannibalizing", 181 | "canonise": "canonize", 182 | "canonised": "canonized", 183 | "canonises": "canonizes", 184 | "canonising": "canonizing", 185 | "capitalise": "capitalize", 186 | "capitalised": "capitalized", 187 | "capitalises": "capitalizes", 188 | "capitalising": "capitalizing", 189 | "caramelise": "caramelize", 190 | "caramelised": "caramelized", 191 | "caramelises": "caramelizes", 192 | "caramelising": "caramelizing", 193 | "carbonise": "carbonize", 194 | "carbonised": "carbonized", 195 | "carbonises": "carbonizes", 196 | "carbonising": "carbonizing", 197 | "carolled": "caroled", 198 | "carolling": "caroling", 199 | "catalogue": "catalog", 200 | "catalogued": "cataloged", 201 | "catalogues": "catalogs", 202 | "cataloguing": "cataloging", 203 | "catalyse": "catalyze", 204 | "catalysed": "catalyzed", 205 | "catalyses": "catalyzes", 206 | "catalysing": "catalyzing", 207 | "categorise": "categorize", 208 | "categorised": "categorized", 209 | "categorises": "categorizes", 210 | "categorising": "categorizing", 211 | "cauterise": "cauterize", 212 | "cauterised": "cauterized", 213 | "cauterises": "cauterizes", 214 | "cauterising": "cauterizing", 215 | "cavilled": "caviled", 216 | "cavilling": "caviling", 217 | "centigramme": "centigram", 218 | "centigrammes": "centigrams", 219 | "centilitre": "centiliter", 220 | "centilitres": "centiliters", 221 | "centimetre": "centimeter", 222 | "centimetres": "centimeters", 223 | "centralise": "centralize", 224 | "centralised": "centralized", 225 | "centralises": "centralizes", 226 | "centralising": "centralizing", 227 | "centre": "center", 228 | "centred": "centered", 229 | "centrefold": "centerfold", 230 | "centrefolds": "centerfolds", 231 | "centrepiece": "centerpiece", 232 | "centrepieces": "centerpieces", 233 | "centres": "centers", 234 | "channelled": "channeled", 235 | "channelling": "channeling", 236 | "characterise": "characterize", 237 | "characterised": "characterized", 238 | "characterises": "characterizes", 239 | "characterising": "characterizing", 240 | "cheque": "check", 241 | "chequebook": "checkbook", 242 | "chequebooks": "checkbooks", 243 | "chequered": "checkered", 244 | "cheques": "checks", 245 | "chilli": "chili", 246 | "chimaera": "chimera", 247 | "chimaeras": "chimeras", 248 | "chiselled": "chiseled", 249 | "chiselling": "chiseling", 250 | "circularise": "circularize", 251 | "circularised": "circularized", 252 | "circularises": "circularizes", 253 | "circularising": "circularizing", 254 | "civilise": "civilize", 255 | "civilised": "civilized", 256 | "civilises": "civilizes", 257 | "civilising": "civilizing", 258 | "clamour": "clamor", 259 | "clamoured": "clamored", 260 | "clamouring": "clamoring", 261 | "clamours": "clamors", 262 | "clangour": "clangor", 263 | "clarinettist": "clarinetist", 264 | "clarinettists": "clarinetists", 265 | "collectivise": "collectivize", 266 | "collectivised": "collectivized", 267 | "collectivises": "collectivizes", 268 | "collectivising": "collectivizing", 269 | "colonisation": "colonization", 270 | "colonise": "colonize", 271 | "colonised": "colonized", 272 | "coloniser": "colonizer", 273 | "colonisers": "colonizers", 274 | "colonises": "colonizes", 275 | "colonising": "colonizing", 276 | "colour": "color", 277 | "colourant": "colorant", 278 | "colourants": "colorants", 279 | "coloured": "colored", 280 | "coloureds": "coloreds", 281 | "colourful": "colorful", 282 | "colourfully": "colorfully", 283 | "colouring": "coloring", 284 | "colourize": "colorize", 285 | "colourized": "colorized", 286 | "colourizes": "colorizes", 287 | "colourizing": "colorizing", 288 | "colourless": "colorless", 289 | "colours": "colors", 290 | "commercialise": "commercialize", 291 | "commercialised": "commercialized", 292 | "commercialises": "commercializes", 293 | "commercialising": "commercializing", 294 | "compartmentalise": "compartmentalize", 295 | "compartmentalised": "compartmentalized", 296 | "compartmentalises": "compartmentalizes", 297 | "compartmentalising": "compartmentalizing", 298 | "computerise": "computerize", 299 | "computerised": "computerized", 300 | "computerises": "computerizes", 301 | "computerising": "computerizing", 302 | "conceptualise": "conceptualize", 303 | "conceptualised": "conceptualized", 304 | "conceptualises": "conceptualizes", 305 | "conceptualising": "conceptualizing", 306 | "connexion": "connection", 307 | "connexions": "connections", 308 | "contextualise": "contextualize", 309 | "contextualised": "contextualized", 310 | "contextualises": "contextualizes", 311 | "contextualising": "contextualizing", 312 | "cosier": "cozier", 313 | "cosies": "cozies", 314 | "cosiest": "coziest", 315 | "cosily": "cozily", 316 | "cosiness": "coziness", 317 | "cosy": "cozy", 318 | "councillor": "councilor", 319 | "councillors": "councilors", 320 | "counselled": "counseled", 321 | "counselling": "counseling", 322 | "counsellor": "counselor", 323 | "counsellors": "counselors", 324 | "crenelated": "crenellated", 325 | "criminalise": "criminalize", 326 | "criminalised": "criminalized", 327 | "criminalises": "criminalizes", 328 | "criminalising": "criminalizing", 329 | "criticise": "criticize", 330 | "criticised": "criticized", 331 | "criticises": "criticizes", 332 | "criticising": "criticizing", 333 | "crueller": "crueler", 334 | "cruellest": "cruelest", 335 | "crystallisation": "crystallization", 336 | "crystallise": "crystallize", 337 | "crystallised": "crystallized", 338 | "crystallises": "crystallizes", 339 | "crystallising": "crystallizing", 340 | "cudgelled": "cudgeled", 341 | "cudgelling": "cudgeling", 342 | "customise": "customize", 343 | "customised": "customized", 344 | "customises": "customizes", 345 | "customising": "customizing", 346 | "cypher": "cipher", 347 | "cyphers": "ciphers", 348 | "decentralisation": "decentralization", 349 | "decentralise": "decentralize", 350 | "decentralised": "decentralized", 351 | "decentralises": "decentralizes", 352 | "decentralising": "decentralizing", 353 | "decriminalisation": "decriminalization", 354 | "decriminalise": "decriminalize", 355 | "decriminalised": "decriminalized", 356 | "decriminalises": "decriminalizes", 357 | "decriminalising": "decriminalizing", 358 | "defence": "defense", 359 | "defenceless": "defenseless", 360 | "defences": "defenses", 361 | "dehumanisation": "dehumanization", 362 | "dehumanise": "dehumanize", 363 | "dehumanised": "dehumanized", 364 | "dehumanises": "dehumanizes", 365 | "dehumanising": "dehumanizing", 366 | "demeanour": "demeanor", 367 | "demilitarisation": "demilitarization", 368 | "demilitarise": "demilitarize", 369 | "demilitarised": "demilitarized", 370 | "demilitarises": "demilitarizes", 371 | "demilitarising": "demilitarizing", 372 | "demobilisation": "demobilization", 373 | "demobilise": "demobilize", 374 | "demobilised": "demobilized", 375 | "demobilises": "demobilizes", 376 | "demobilising": "demobilizing", 377 | "democratisation": "democratization", 378 | "democratise": "democratize", 379 | "democratised": "democratized", 380 | "democratises": "democratizes", 381 | "democratising": "democratizing", 382 | "demonise": "demonize", 383 | "demonised": "demonized", 384 | "demonises": "demonizes", 385 | "demonising": "demonizing", 386 | "demoralisation": "demoralization", 387 | "demoralise": "demoralize", 388 | "demoralised": "demoralized", 389 | "demoralises": "demoralizes", 390 | "demoralising": "demoralizing", 391 | "denationalisation": "denationalization", 392 | "denationalise": "denationalize", 393 | "denationalised": "denationalized", 394 | "denationalises": "denationalizes", 395 | "denationalising": "denationalizing", 396 | "deodorise": "deodorize", 397 | "deodorised": "deodorized", 398 | "deodorises": "deodorizes", 399 | "deodorising": "deodorizing", 400 | "depersonalise": "depersonalize", 401 | "depersonalised": "depersonalized", 402 | "depersonalises": "depersonalizes", 403 | "depersonalising": "depersonalizing", 404 | "deputise": "deputize", 405 | "deputised": "deputized", 406 | "deputises": "deputizes", 407 | "deputising": "deputizing", 408 | "desensitisation": "desensitization", 409 | "desensitise": "desensitize", 410 | "desensitised": "desensitized", 411 | "desensitises": "desensitizes", 412 | "desensitising": "desensitizing", 413 | "destabilisation": "destabilization", 414 | "destabilise": "destabilize", 415 | "destabilised": "destabilized", 416 | "destabilises": "destabilizes", 417 | "destabilising": "destabilizing", 418 | "dialled": "dialed", 419 | "dialling": "dialing", 420 | "dialogue": "dialog", 421 | "dialogues": "dialogs", 422 | "diarrhoea": "diarrhea", 423 | "digitise": "digitize", 424 | "digitised": "digitized", 425 | "digitises": "digitizes", 426 | "digitising": "digitizing", 427 | "disc": "disk", 428 | "discolour": "discolor", 429 | "discoloured": "discolored", 430 | "discolouring": "discoloring", 431 | "discolours": "discolors", 432 | "discs": "disks", 433 | "disembowelled": "disemboweled", 434 | "disembowelling": "disemboweling", 435 | "disfavour": "disfavor", 436 | "dishevelled": "disheveled", 437 | "dishonour": "dishonor", 438 | "dishonourable": "dishonorable", 439 | "dishonourably": "dishonorably", 440 | "dishonoured": "dishonored", 441 | "dishonouring": "dishonoring", 442 | "dishonours": "dishonors", 443 | "disorganisation": "disorganization", 444 | "disorganised": "disorganized", 445 | "distil": "distill", 446 | "distils": "distills", 447 | "dramatisation": "dramatization", 448 | "dramatisations": "dramatizations", 449 | "dramatise": "dramatize", 450 | "dramatised": "dramatized", 451 | "dramatises": "dramatizes", 452 | "dramatising": "dramatizing", 453 | "draught": "draft", 454 | "draughtboard": "draftboard", 455 | "draughtboards": "draftboards", 456 | "draughtier": "draftier", 457 | "draughtiest": "draftiest", 458 | "draughts": "drafts", 459 | "draughtsman": "draftsman", 460 | "draughtsmanship": "draftsmanship", 461 | "draughtsmen": "draftsmen", 462 | "draughtswoman": "draftswoman", 463 | "draughtswomen": "draftswomen", 464 | "draughty": "drafty", 465 | "drivelled": "driveled", 466 | "drivelling": "driveling", 467 | "duelled": "dueled", 468 | "duelling": "dueling", 469 | "economise": "economize", 470 | "economised": "economized", 471 | "economises": "economizes", 472 | "economising": "economizing", 473 | "editorialise": "editorialize", 474 | "editorialised": "editorialized", 475 | "editorialises": "editorializes", 476 | "editorialising": "editorializing", 477 | "edoema": "edema", 478 | "empathise": "empathize", 479 | "empathised": "empathized", 480 | "empathises": "empathizes", 481 | "empathising": "empathizing", 482 | "emphasise": "emphasize", 483 | "emphasised": "emphasized", 484 | "emphasises": "emphasizes", 485 | "emphasising": "emphasizing", 486 | "enamelled": "enameled", 487 | "enamelling": "enameling", 488 | "enamoured": "enamored", 489 | "encyclopaedia": "encyclopedia", 490 | "encyclopaedias": "encyclopedias", 491 | "encyclopaedic": "encyclopedic", 492 | "endeavour": "endeavor", 493 | "endeavoured": "endeavored", 494 | "endeavouring": "endeavoring", 495 | "endeavours": "endeavors", 496 | "energise": "energize", 497 | "energised": "energized", 498 | "energises": "energizes", 499 | "energising": "energizing", 500 | "enrol": "enroll", 501 | "enrols": "enrolls", 502 | "enthral": "enthrall", 503 | "enthrals": "enthralls", 504 | "epaulette": "epaulet", 505 | "epaulettes": "epaulets", 506 | "epicentre": "epicenter", 507 | "epicentres": "epicenters", 508 | "epilogue": "epilog", 509 | "epilogues": "epilogs", 510 | "epitomise": "epitomize", 511 | "epitomised": "epitomized", 512 | "epitomises": "epitomizes", 513 | "epitomising": "epitomizing", 514 | "equalisation": "equalization", 515 | "equalise": "equalize", 516 | "equalised": "equalized", 517 | "equaliser": "equalizer", 518 | "equalisers": "equalizers", 519 | "equalises": "equalizes", 520 | "equalising": "equalizing", 521 | "eulogise": "eulogize", 522 | "eulogised": "eulogized", 523 | "eulogises": "eulogizes", 524 | "eulogising": "eulogizing", 525 | "evangelise": "evangelize", 526 | "evangelised": "evangelized", 527 | "evangelises": "evangelizes", 528 | "evangelising": "evangelizing", 529 | "exorcise": "exorcize", 530 | "exorcised": "exorcized", 531 | "exorcises": "exorcizes", 532 | "exorcising": "exorcizing", 533 | "extemporisation": "extemporization", 534 | "extemporise": "extemporize", 535 | "extemporised": "extemporized", 536 | "extemporises": "extemporizes", 537 | "extemporising": "extemporizing", 538 | "externalisation": "externalization", 539 | "externalisations": "externalizations", 540 | "externalise": "externalize", 541 | "externalised": "externalized", 542 | "externalises": "externalizes", 543 | "externalising": "externalizing", 544 | "factorise": "factorize", 545 | "factorised": "factorized", 546 | "factorises": "factorizes", 547 | "factorising": "factorizing", 548 | "faecal": "fecal", 549 | "faeces": "feces", 550 | "familiarisation": "familiarization", 551 | "familiarise": "familiarize", 552 | "familiarised": "familiarized", 553 | "familiarises": "familiarizes", 554 | "familiarising": "familiarizing", 555 | "fantasise": "fantasize", 556 | "fantasised": "fantasized", 557 | "fantasises": "fantasizes", 558 | "fantasising": "fantasizing", 559 | "favour": "favor", 560 | "favourable": "favorable", 561 | "favourably": "favorably", 562 | "favoured": "favored", 563 | "favouring": "favoring", 564 | "favourite": "favorite", 565 | "favourites": "favorites", 566 | "favouritism": "favoritism", 567 | "favours": "favors", 568 | "feminise": "feminize", 569 | "feminised": "feminized", 570 | "feminises": "feminizes", 571 | "feminising": "feminizing", 572 | "fertilisation": "fertilization", 573 | "fertilise": "fertilize", 574 | "fertilised": "fertilized", 575 | "fertiliser": "fertilizer", 576 | "fertilisers": "fertilizers", 577 | "fertilises": "fertilizes", 578 | "fertilising": "fertilizing", 579 | "fervour": "fervor", 580 | "fibre": "fiber", 581 | "fibreglass": "fiberglass", 582 | "fibres": "fibers", 583 | "fictionalisation": "fictionalization", 584 | "fictionalisations": "fictionalizations", 585 | "fictionalise": "fictionalize", 586 | "fictionalised": "fictionalized", 587 | "fictionalises": "fictionalizes", 588 | "fictionalising": "fictionalizing", 589 | "fillet": "filet", 590 | "filleted": "fileted", 591 | "filleting": "fileting", 592 | "fillets": "filets", 593 | "finalisation": "finalization", 594 | "finalise": "finalize", 595 | "finalised": "finalized", 596 | "finalises": "finalizes", 597 | "finalising": "finalizing", 598 | "flautist": "flutist", 599 | "flautists": "flutists", 600 | "flavour": "flavor", 601 | "flavoured": "flavored", 602 | "flavouring": "flavoring", 603 | "flavourings": "flavorings", 604 | "flavourless": "flavorless", 605 | "flavours": "flavors", 606 | "flavoursome": "flavorsome", 607 | "flyer / flier": "flier / flyer", 608 | "foetal": "fetal", 609 | "foetid": "fetid", 610 | "foetus": "fetus", 611 | "foetuses": "fetuses", 612 | "formalisation": "formalization", 613 | "formalise": "formalize", 614 | "formalised": "formalized", 615 | "formalises": "formalizes", 616 | "formalising": "formalizing", 617 | "fossilisation": "fossilization", 618 | "fossilise": "fossilize", 619 | "fossilised": "fossilized", 620 | "fossilises": "fossilizes", 621 | "fossilising": "fossilizing", 622 | "fraternisation": "fraternization", 623 | "fraternise": "fraternize", 624 | "fraternised": "fraternized", 625 | "fraternises": "fraternizes", 626 | "fraternising": "fraternizing", 627 | "fulfil": "fulfill", 628 | "fulfilment": "fulfillment", 629 | "fulfils": "fulfills", 630 | "funnelled": "funneled", 631 | "funnelling": "funneling", 632 | "gage": "gauge", 633 | "gaged": "gauged", 634 | "gages": "gauges", 635 | "gaging": "gauging", 636 | "galvanise": "galvanize", 637 | "galvanised": "galvanized", 638 | "galvanises": "galvanizes", 639 | "galvanising": "galvanizing", 640 | "gambolled": "gamboled", 641 | "gambolling": "gamboling", 642 | "gaol": "jail", 643 | "gaolbird": "jailbird", 644 | "gaolbirds": "jailbirds", 645 | "gaolbreak": "jailbreak", 646 | "gaolbreaks": "jailbreaks", 647 | "gaoled": "jailed", 648 | "gaoler": "jailer", 649 | "gaolers": "jailers", 650 | "gaoling": "jailing", 651 | "gaols": "jails", 652 | "gasses": "gases", 653 | "generalisation": "generalization", 654 | "generalisations": "generalizations", 655 | "generalise": "generalize", 656 | "generalised": "generalized", 657 | "generalises": "generalizes", 658 | "generalising": "generalizing", 659 | "ghettoise": "ghettoize", 660 | "ghettoised": "ghettoized", 661 | "ghettoises": "ghettoizes", 662 | "ghettoising": "ghettoizing", 663 | "gipsies": "gypsies", 664 | "glamor": "glamour", 665 | "glamorise": "glamorize", 666 | "glamorised": "glamorized", 667 | "glamorises": "glamorizes", 668 | "glamorising": "glamorizing", 669 | "globalisation": "globalization", 670 | "globalise": "globalize", 671 | "globalised": "globalized", 672 | "globalises": "globalizes", 673 | "globalising": "globalizing", 674 | "glueing": "gluing", 675 | "goitre": "goiter", 676 | "goitres": "goiters", 677 | "gonorrhoea": "gonorrhea", 678 | "gramme": "gram", 679 | "grammes": "grams", 680 | "gravelled": "graveled", 681 | "grey": "gray", 682 | "greyed": "grayed", 683 | "greying": "graying", 684 | "greyish": "grayish", 685 | "greyness": "grayness", 686 | "greys": "grays", 687 | "grovelled": "groveled", 688 | "grovelling": "groveling", 689 | "groyne": "groin", 690 | "groynes": "groins", 691 | "gruelling": "grueling", 692 | "gruellingly": "gruelingly", 693 | "gryphon": "griffin", 694 | "gryphons": "griffins", 695 | "gynaecological": "gynecological", 696 | "gynaecologist": "gynecologist", 697 | "gynaecologists": "gynecologists", 698 | "gynaecology": "gynecology", 699 | "haematological": "hematological", 700 | "haematologist": "hematologist", 701 | "haematologists": "hematologists", 702 | "haematology": "hematology", 703 | "haemoglobin": "hemoglobin", 704 | "haemophilia": "hemophilia", 705 | "haemophiliac": "hemophiliac", 706 | "haemophiliacs": "hemophiliacs", 707 | "haemorrhage": "hemorrhage", 708 | "haemorrhaged": "hemorrhaged", 709 | "haemorrhages": "hemorrhages", 710 | "haemorrhaging": "hemorrhaging", 711 | "haemorrhoids": "hemorrhoids", 712 | "harbour": "harbor", 713 | "harboured": "harbored", 714 | "harbouring": "harboring", 715 | "harbours": "harbors", 716 | "harmonisation": "harmonization", 717 | "harmonise": "harmonize", 718 | "harmonised": "harmonized", 719 | "harmonises": "harmonizes", 720 | "harmonising": "harmonizing", 721 | "homoeopath": "homeopath", 722 | "homoeopathic": "homeopathic", 723 | "homoeopaths": "homeopaths", 724 | "homoeopathy": "homeopathy", 725 | "homogenise": "homogenize", 726 | "homogenised": "homogenized", 727 | "homogenises": "homogenizes", 728 | "homogenising": "homogenizing", 729 | "honour": "honor", 730 | "honourable": "honorable", 731 | "honourably": "honorably", 732 | "honoured": "honored", 733 | "honouring": "honoring", 734 | "honours": "honors", 735 | "hospitalisation": "hospitalization", 736 | "hospitalise": "hospitalize", 737 | "hospitalised": "hospitalized", 738 | "hospitalises": "hospitalizes", 739 | "hospitalising": "hospitalizing", 740 | "humanise": "humanize", 741 | "humanised": "humanized", 742 | "humanises": "humanizes", 743 | "humanising": "humanizing", 744 | "humour": "humor", 745 | "humoured": "humored", 746 | "humouring": "humoring", 747 | "humourless": "humorless", 748 | "humours": "humors", 749 | "hybridise": "hybridize", 750 | "hybridised": "hybridized", 751 | "hybridises": "hybridizes", 752 | "hybridising": "hybridizing", 753 | "hypnotise": "hypnotize", 754 | "hypnotised": "hypnotized", 755 | "hypnotises": "hypnotizes", 756 | "hypnotising": "hypnotizing", 757 | "hypothesise": "hypothesize", 758 | "hypothesised": "hypothesized", 759 | "hypothesises": "hypothesizes", 760 | "hypothesising": "hypothesizing", 761 | "idealisation": "idealization", 762 | "idealise": "idealize", 763 | "idealised": "idealized", 764 | "idealises": "idealizes", 765 | "idealising": "idealizing", 766 | "idolise": "idolize", 767 | "idolised": "idolized", 768 | "idolises": "idolizes", 769 | "idolising": "idolizing", 770 | "immobilisation": "immobilization", 771 | "immobilise": "immobilize", 772 | "immobilised": "immobilized", 773 | "immobiliser": "immobilizer", 774 | "immobilisers": "immobilizers", 775 | "immobilises": "immobilizes", 776 | "immobilising": "immobilizing", 777 | "immortalise": "immortalize", 778 | "immortalised": "immortalized", 779 | "immortalises": "immortalizes", 780 | "immortalising": "immortalizing", 781 | "immunisation": "immunization", 782 | "immunise": "immunize", 783 | "immunised": "immunized", 784 | "immunises": "immunizes", 785 | "immunising": "immunizing", 786 | "impanelled": "impaneled", 787 | "impanelling": "impaneling", 788 | "imperilled": "imperiled", 789 | "imperilling": "imperiling", 790 | "individualise": "individualize", 791 | "individualised": "individualized", 792 | "individualises": "individualizes", 793 | "individualising": "individualizing", 794 | "industrialise": "industrialize", 795 | "industrialised": "industrialized", 796 | "industrialises": "industrializes", 797 | "industrialising": "industrializing", 798 | "inflexion": "inflection", 799 | "inflexions": "inflections", 800 | "initialise": "initialize", 801 | "initialised": "initialized", 802 | "initialises": "initializes", 803 | "initialising": "initializing", 804 | "initialled": "initialed", 805 | "initialling": "initialing", 806 | "instal": "install", 807 | "instalment": "installment", 808 | "instalments": "installments", 809 | "instals": "installs", 810 | "instil": "instill", 811 | "instils": "instills", 812 | "institutionalisation": "institutionalization", 813 | "institutionalise": "institutionalize", 814 | "institutionalised": "institutionalized", 815 | "institutionalises": "institutionalizes", 816 | "institutionalising": "institutionalizing", 817 | "intellectualise": "intellectualize", 818 | "intellectualised": "intellectualized", 819 | "intellectualises": "intellectualizes", 820 | "intellectualising": "intellectualizing", 821 | "internalisation": "internalization", 822 | "internalise": "internalize", 823 | "internalised": "internalized", 824 | "internalises": "internalizes", 825 | "internalising": "internalizing", 826 | "internationalisation": "internationalization", 827 | "internationalise": "internationalize", 828 | "internationalised": "internationalized", 829 | "internationalises": "internationalizes", 830 | "internationalising": "internationalizing", 831 | "ionisation": "ionization", 832 | "ionise": "ionize", 833 | "ionised": "ionized", 834 | "ioniser": "ionizer", 835 | "ionisers": "ionizers", 836 | "ionises": "ionizes", 837 | "ionising": "ionizing", 838 | "italicise": "italicize", 839 | "italicised": "italicized", 840 | "italicises": "italicizes", 841 | "italicising": "italicizing", 842 | "itemise": "itemize", 843 | "itemised": "itemized", 844 | "itemises": "itemizes", 845 | "itemising": "itemizing", 846 | "jeopardise": "jeopardize", 847 | "jeopardised": "jeopardized", 848 | "jeopardises": "jeopardizes", 849 | "jeopardising": "jeopardizing", 850 | "jewelled": "jeweled", 851 | "jeweller": "jeweler", 852 | "jewellers": "jewelers", 853 | "jewellery": "jewelry", 854 | "judgement": "judgment", 855 | "kilogramme": "kilogram", 856 | "kilogrammes": "kilograms", 857 | "kilometre": "kilometer", 858 | "kilometres": "kilometers", 859 | "labelled": "labeled", 860 | "labelling": "labeling", 861 | "labour": "labor", 862 | "laboured": "labored", 863 | "labourer": "laborer", 864 | "labourers": "laborers", 865 | "labouring": "laboring", 866 | "labours": "labors", 867 | "lacklustre": "lackluster", 868 | "legalisation": "legalization", 869 | "legalise": "legalize", 870 | "legalised": "legalized", 871 | "legalises": "legalizes", 872 | "legalising": "legalizing", 873 | "legitimise": "legitimize", 874 | "legitimised": "legitimized", 875 | "legitimises": "legitimizes", 876 | "legitimising": "legitimizing", 877 | "leukaemia": "leukemia", 878 | "levelled": "leveled", 879 | "leveller": "leveler", 880 | "levellers": "levelers", 881 | "levelling": "leveling", 882 | "libelled": "libeled", 883 | "libelling": "libeling", 884 | "libellous": "libelous", 885 | "liberalisation": "liberalization", 886 | "liberalise": "liberalize", 887 | "liberalised": "liberalized", 888 | "liberalises": "liberalizes", 889 | "liberalising": "liberalizing", 890 | "licence": "license", 891 | "licenced": "licensed", 892 | "licences": "licenses", 893 | "licencing": "licensing", 894 | "likeable": "likable", 895 | "lionisation": "lionization", 896 | "lionise": "lionize", 897 | "lionised": "lionized", 898 | "lionises": "lionizes", 899 | "lionising": "lionizing", 900 | "liquidise": "liquidize", 901 | "liquidised": "liquidized", 902 | "liquidiser": "liquidizer", 903 | "liquidisers": "liquidizers", 904 | "liquidises": "liquidizes", 905 | "liquidising": "liquidizing", 906 | "litre": "liter", 907 | "litres": "liters", 908 | "localise": "localize", 909 | "localised": "localized", 910 | "localises": "localizes", 911 | "localising": "localizing", 912 | "louvre": "louver", 913 | "louvred": "louvered", 914 | "louvres": "louvers", 915 | "lustre": "luster", 916 | "magnetise": "magnetize", 917 | "magnetised": "magnetized", 918 | "magnetises": "magnetizes", 919 | "magnetising": "magnetizing", 920 | "manoeuvrability": "maneuverability", 921 | "manoeuvrable": "maneuverable", 922 | "manoeuvre": "maneuver", 923 | "manoeuvred": "maneuvered", 924 | "manoeuvres": "maneuvers", 925 | "manoeuvring": "maneuvering", 926 | "manoeuvrings": "maneuverings", 927 | "marginalisation": "marginalization", 928 | "marginalise": "marginalize", 929 | "marginalised": "marginalized", 930 | "marginalises": "marginalizes", 931 | "marginalising": "marginalizing", 932 | "marshalled": "marshaled", 933 | "marshalling": "marshaling", 934 | "marvelled": "marveled", 935 | "marvelling": "marveling", 936 | "marvellous": "marvelous", 937 | "marvellously": "marvelously", 938 | "materialisation": "materialization", 939 | "materialise": "materialize", 940 | "materialised": "materialized", 941 | "materialises": "materializes", 942 | "materialising": "materializing", 943 | "maximisation": "maximization", 944 | "maximise": "maximize", 945 | "maximised": "maximized", 946 | "maximises": "maximizes", 947 | "maximising": "maximizing", 948 | "meagre": "meager", 949 | "mechanisation": "mechanization", 950 | "mechanise": "mechanize", 951 | "mechanised": "mechanized", 952 | "mechanises": "mechanizes", 953 | "mechanising": "mechanizing", 954 | "mediaeval": "medieval", 955 | "memorialise": "memorialize", 956 | "memorialised": "memorialized", 957 | "memorialises": "memorializes", 958 | "memorialising": "memorializing", 959 | "memorise": "memorize", 960 | "memorised": "memorized", 961 | "memorises": "memorizes", 962 | "memorising": "memorizing", 963 | "mesmerise": "mesmerize", 964 | "mesmerised": "mesmerized", 965 | "mesmerises": "mesmerizes", 966 | "mesmerising": "mesmerizing", 967 | "metabolise": "metabolize", 968 | "metabolised": "metabolized", 969 | "metabolises": "metabolizes", 970 | "metabolising": "metabolizing", 971 | "metre": "meter", 972 | "metres": "meters", 973 | "mhm": "hmm", 974 | "micrometre": "micrometer", 975 | "micrometres": "micrometers", 976 | "militarise": "militarize", 977 | "militarised": "militarized", 978 | "militarises": "militarizes", 979 | "militarising": "militarizing", 980 | "milligramme": "milligram", 981 | "milligrammes": "milligrams", 982 | "millilitre": "milliliter", 983 | "millilitres": "milliliters", 984 | "millimetre": "millimeter", 985 | "millimetres": "millimeters", 986 | "miniaturisation": "miniaturization", 987 | "miniaturise": "miniaturize", 988 | "miniaturised": "miniaturized", 989 | "miniaturises": "miniaturizes", 990 | "miniaturising": "miniaturizing", 991 | "minibusses": "minibuses", 992 | "minimise": "minimize", 993 | "minimised": "minimized", 994 | "minimises": "minimizes", 995 | "minimising": "minimizing", 996 | "misbehaviour": "misbehavior", 997 | "misdemeanour": "misdemeanor", 998 | "misdemeanours": "misdemeanors", 999 | "misspelt": "misspelled", 1000 | "mitre": "miter", 1001 | "mitres": "miters", 1002 | "mm": "hmm", 1003 | "mmm": "hmm", 1004 | "mobilisation": "mobilization", 1005 | "mobilise": "mobilize", 1006 | "mobilised": "mobilized", 1007 | "mobilises": "mobilizes", 1008 | "mobilising": "mobilizing", 1009 | "modelled": "modeled", 1010 | "modeller": "modeler", 1011 | "modellers": "modelers", 1012 | "modelling": "modeling", 1013 | "modernise": "modernize", 1014 | "modernised": "modernized", 1015 | "modernises": "modernizes", 1016 | "modernising": "modernizing", 1017 | "moisturise": "moisturize", 1018 | "moisturised": "moisturized", 1019 | "moisturiser": "moisturizer", 1020 | "moisturisers": "moisturizers", 1021 | "moisturises": "moisturizes", 1022 | "moisturising": "moisturizing", 1023 | "monologue": "monolog", 1024 | "monologues": "monologs", 1025 | "monopolisation": "monopolization", 1026 | "monopolise": "monopolize", 1027 | "monopolised": "monopolized", 1028 | "monopolises": "monopolizes", 1029 | "monopolising": "monopolizing", 1030 | "moralise": "moralize", 1031 | "moralised": "moralized", 1032 | "moralises": "moralizes", 1033 | "moralising": "moralizing", 1034 | "motorised": "motorized", 1035 | "mould": "mold", 1036 | "moulded": "molded", 1037 | "moulder": "molder", 1038 | "mouldered": "moldered", 1039 | "mouldering": "moldering", 1040 | "moulders": "molders", 1041 | "mouldier": "moldier", 1042 | "mouldiest": "moldiest", 1043 | "moulding": "molding", 1044 | "mouldings": "moldings", 1045 | "moulds": "molds", 1046 | "mouldy": "moldy", 1047 | "moult": "molt", 1048 | "moulted": "molted", 1049 | "moulting": "molting", 1050 | "moults": "molts", 1051 | "moustache": "mustache", 1052 | "moustached": "mustached", 1053 | "moustaches": "mustaches", 1054 | "moustachioed": "mustachioed", 1055 | "multicoloured": "multicolored", 1056 | "nationalisation": "nationalization", 1057 | "nationalisations": "nationalizations", 1058 | "nationalise": "nationalize", 1059 | "nationalised": "nationalized", 1060 | "nationalises": "nationalizes", 1061 | "nationalising": "nationalizing", 1062 | "naturalisation": "naturalization", 1063 | "naturalise": "naturalize", 1064 | "naturalised": "naturalized", 1065 | "naturalises": "naturalizes", 1066 | "naturalising": "naturalizing", 1067 | "neighbour": "neighbor", 1068 | "neighbourhood": "neighborhood", 1069 | "neighbourhoods": "neighborhoods", 1070 | "neighbouring": "neighboring", 1071 | "neighbourliness": "neighborliness", 1072 | "neighbourly": "neighborly", 1073 | "neighbours": "neighbors", 1074 | "neutralisation": "neutralization", 1075 | "neutralise": "neutralize", 1076 | "neutralised": "neutralized", 1077 | "neutralises": "neutralizes", 1078 | "neutralising": "neutralizing", 1079 | "normalisation": "normalization", 1080 | "normalise": "normalize", 1081 | "normalised": "normalized", 1082 | "normalises": "normalizes", 1083 | "normalising": "normalizing", 1084 | "odour": "odor", 1085 | "odourless": "odorless", 1086 | "odours": "odors", 1087 | "oesophagus": "esophagus", 1088 | "oesophaguses": "esophaguses", 1089 | "oestrogen": "estrogen", 1090 | "offence": "offense", 1091 | "offences": "offenses", 1092 | "omelette": "omelet", 1093 | "omelettes": "omelets", 1094 | "optimise": "optimize", 1095 | "optimised": "optimized", 1096 | "optimises": "optimizes", 1097 | "optimising": "optimizing", 1098 | "organisation": "organization", 1099 | "organisational": "organizational", 1100 | "organisations": "organizations", 1101 | "organise": "organize", 1102 | "organised": "organized", 1103 | "organiser": "organizer", 1104 | "organisers": "organizers", 1105 | "organises": "organizes", 1106 | "organising": "organizing", 1107 | "orthopaedic": "orthopedic", 1108 | "orthopaedics": "orthopedics", 1109 | "ostracise": "ostracize", 1110 | "ostracised": "ostracized", 1111 | "ostracises": "ostracizes", 1112 | "ostracising": "ostracizing", 1113 | "outmanoeuvre": "outmaneuver", 1114 | "outmanoeuvred": "outmaneuvered", 1115 | "outmanoeuvres": "outmaneuvers", 1116 | "outmanoeuvring": "outmaneuvering", 1117 | "overemphasise": "overemphasize", 1118 | "overemphasised": "overemphasized", 1119 | "overemphasises": "overemphasizes", 1120 | "overemphasising": "overemphasizing", 1121 | "oxidisation": "oxidization", 1122 | "oxidise": "oxidize", 1123 | "oxidised": "oxidized", 1124 | "oxidises": "oxidizes", 1125 | "oxidising": "oxidizing", 1126 | "paederast": "pederast", 1127 | "paederasts": "pederasts", 1128 | "paediatric": "pediatric", 1129 | "paediatrician": "pediatrician", 1130 | "paediatricians": "pediatricians", 1131 | "paediatrics": "pediatrics", 1132 | "paedophile": "pedophile", 1133 | "paedophiles": "pedophiles", 1134 | "paedophilia": "pedophilia", 1135 | "palaeolithic": "paleolithic", 1136 | "palaeontologist": "paleontologist", 1137 | "palaeontologists": "paleontologists", 1138 | "palaeontology": "paleontology", 1139 | "panelled": "paneled", 1140 | "panelling": "paneling", 1141 | "panellist": "panelist", 1142 | "panellists": "panelists", 1143 | "paralyse": "paralyze", 1144 | "paralysed": "paralyzed", 1145 | "paralyses": "paralyzes", 1146 | "paralysing": "paralyzing", 1147 | "parcelled": "parceled", 1148 | "parcelling": "parceling", 1149 | "parlour": "parlor", 1150 | "parlours": "parlors", 1151 | "particularise": "particularize", 1152 | "particularised": "particularized", 1153 | "particularises": "particularizes", 1154 | "particularising": "particularizing", 1155 | "passivisation": "passivization", 1156 | "passivise": "passivize", 1157 | "passivised": "passivized", 1158 | "passivises": "passivizes", 1159 | "passivising": "passivizing", 1160 | "pasteurisation": "pasteurization", 1161 | "pasteurise": "pasteurize", 1162 | "pasteurised": "pasteurized", 1163 | "pasteurises": "pasteurizes", 1164 | "pasteurising": "pasteurizing", 1165 | "patronise": "patronize", 1166 | "patronised": "patronized", 1167 | "patronises": "patronizes", 1168 | "patronising": "patronizing", 1169 | "patronisingly": "patronizingly", 1170 | "pedalled": "pedaled", 1171 | "pedalling": "pedaling", 1172 | "pedestrianisation": "pedestrianization", 1173 | "pedestrianise": "pedestrianize", 1174 | "pedestrianised": "pedestrianized", 1175 | "pedestrianises": "pedestrianizes", 1176 | "pedestrianising": "pedestrianizing", 1177 | "penalise": "penalize", 1178 | "penalised": "penalized", 1179 | "penalises": "penalizes", 1180 | "penalising": "penalizing", 1181 | "pencilled": "penciled", 1182 | "pencilling": "penciling", 1183 | "personalise": "personalize", 1184 | "personalised": "personalized", 1185 | "personalises": "personalizes", 1186 | "personalising": "personalizing", 1187 | "pharmacopoeia": "pharmacopeia", 1188 | "pharmacopoeias": "pharmacopeias", 1189 | "philosophise": "philosophize", 1190 | "philosophised": "philosophized", 1191 | "philosophises": "philosophizes", 1192 | "philosophising": "philosophizing", 1193 | "philtre": "filter", 1194 | "philtres": "filters", 1195 | "phoney": "phony", 1196 | "plagiarise": "plagiarize", 1197 | "plagiarised": "plagiarized", 1198 | "plagiarises": "plagiarizes", 1199 | "plagiarising": "plagiarizing", 1200 | "plough": "plow", 1201 | "ploughed": "plowed", 1202 | "ploughing": "plowing", 1203 | "ploughman": "plowman", 1204 | "ploughmen": "plowmen", 1205 | "ploughs": "plows", 1206 | "ploughshare": "plowshare", 1207 | "ploughshares": "plowshares", 1208 | "polarisation": "polarization", 1209 | "polarise": "polarize", 1210 | "polarised": "polarized", 1211 | "polarises": "polarizes", 1212 | "polarising": "polarizing", 1213 | "politicisation": "politicization", 1214 | "politicise": "politicize", 1215 | "politicised": "politicized", 1216 | "politicises": "politicizes", 1217 | "politicising": "politicizing", 1218 | "popularisation": "popularization", 1219 | "popularise": "popularize", 1220 | "popularised": "popularized", 1221 | "popularises": "popularizes", 1222 | "popularising": "popularizing", 1223 | "pouffe": "pouf", 1224 | "pouffes": "poufs", 1225 | "practise": "practice", 1226 | "practised": "practiced", 1227 | "practises": "practices", 1228 | "practising": "practicing", 1229 | "praesidium": "presidium", 1230 | "praesidiums": "presidiums", 1231 | "pressurisation": "pressurization", 1232 | "pressurise": "pressurize", 1233 | "pressurised": "pressurized", 1234 | "pressurises": "pressurizes", 1235 | "pressurising": "pressurizing", 1236 | "pretence": "pretense", 1237 | "pretences": "pretenses", 1238 | "primaeval": "primeval", 1239 | "prioritisation": "prioritization", 1240 | "prioritise": "prioritize", 1241 | "prioritised": "prioritized", 1242 | "prioritises": "prioritizes", 1243 | "prioritising": "prioritizing", 1244 | "privatisation": "privatization", 1245 | "privatisations": "privatizations", 1246 | "privatise": "privatize", 1247 | "privatised": "privatized", 1248 | "privatises": "privatizes", 1249 | "privatising": "privatizing", 1250 | "professionalisation": "professionalization", 1251 | "professionalise": "professionalize", 1252 | "professionalised": "professionalized", 1253 | "professionalises": "professionalizes", 1254 | "professionalising": "professionalizing", 1255 | "programme": "program", 1256 | "programmes": "programs", 1257 | "prologue": "prolog", 1258 | "prologues": "prologs", 1259 | "propagandise": "propagandize", 1260 | "propagandised": "propagandized", 1261 | "propagandises": "propagandizes", 1262 | "propagandising": "propagandizing", 1263 | "proselytise": "proselytize", 1264 | "proselytised": "proselytized", 1265 | "proselytiser": "proselytizer", 1266 | "proselytisers": "proselytizers", 1267 | "proselytises": "proselytizes", 1268 | "proselytising": "proselytizing", 1269 | "psychoanalyse": "psychoanalyze", 1270 | "psychoanalysed": "psychoanalyzed", 1271 | "psychoanalyses": "psychoanalyzes", 1272 | "psychoanalysing": "psychoanalyzing", 1273 | "publicise": "publicize", 1274 | "publicised": "publicized", 1275 | "publicises": "publicizes", 1276 | "publicising": "publicizing", 1277 | "pulverisation": "pulverization", 1278 | "pulverise": "pulverize", 1279 | "pulverised": "pulverized", 1280 | "pulverises": "pulverizes", 1281 | "pulverising": "pulverizing", 1282 | "pummelled": "pummel", 1283 | "pummelling": "pummeled", 1284 | "pyjama": "pajama", 1285 | "pyjamas": "pajamas", 1286 | "pzazz": "pizzazz", 1287 | "quarrelled": "quarreled", 1288 | "quarrelling": "quarreling", 1289 | "radicalise": "radicalize", 1290 | "radicalised": "radicalized", 1291 | "radicalises": "radicalizes", 1292 | "radicalising": "radicalizing", 1293 | "rancour": "rancor", 1294 | "randomise": "randomize", 1295 | "randomised": "randomized", 1296 | "randomises": "randomizes", 1297 | "randomising": "randomizing", 1298 | "rationalisation": "rationalization", 1299 | "rationalisations": "rationalizations", 1300 | "rationalise": "rationalize", 1301 | "rationalised": "rationalized", 1302 | "rationalises": "rationalizes", 1303 | "rationalising": "rationalizing", 1304 | "ravelled": "raveled", 1305 | "ravelling": "raveling", 1306 | "realisable": "realizable", 1307 | "realisation": "realization", 1308 | "realisations": "realizations", 1309 | "realise": "realize", 1310 | "realised": "realized", 1311 | "realises": "realizes", 1312 | "realising": "realizing", 1313 | "recognisable": "recognizable", 1314 | "recognisably": "recognizably", 1315 | "recognisance": "recognizance", 1316 | "recognise": "recognize", 1317 | "recognised": "recognized", 1318 | "recognises": "recognizes", 1319 | "recognising": "recognizing", 1320 | "reconnoitre": "reconnoiter", 1321 | "reconnoitred": "reconnoitered", 1322 | "reconnoitres": "reconnoiters", 1323 | "reconnoitring": "reconnoitering", 1324 | "refuelled": "refueled", 1325 | "refuelling": "refueling", 1326 | "regularisation": "regularization", 1327 | "regularise": "regularize", 1328 | "regularised": "regularized", 1329 | "regularises": "regularizes", 1330 | "regularising": "regularizing", 1331 | "remodelled": "remodeled", 1332 | "remodelling": "remodeling", 1333 | "remould": "remold", 1334 | "remoulded": "remolded", 1335 | "remoulding": "remolding", 1336 | "remoulds": "remolds", 1337 | "reorganisation": "reorganization", 1338 | "reorganisations": "reorganizations", 1339 | "reorganise": "reorganize", 1340 | "reorganised": "reorganized", 1341 | "reorganises": "reorganizes", 1342 | "reorganising": "reorganizing", 1343 | "revelled": "reveled", 1344 | "reveller": "reveler", 1345 | "revellers": "revelers", 1346 | "revelling": "reveling", 1347 | "revitalise": "revitalize", 1348 | "revitalised": "revitalized", 1349 | "revitalises": "revitalizes", 1350 | "revitalising": "revitalizing", 1351 | "revolutionise": "revolutionize", 1352 | "revolutionised": "revolutionized", 1353 | "revolutionises": "revolutionizes", 1354 | "revolutionising": "revolutionizing", 1355 | "rhapsodise": "rhapsodize", 1356 | "rhapsodised": "rhapsodized", 1357 | "rhapsodises": "rhapsodizes", 1358 | "rhapsodising": "rhapsodizing", 1359 | "rigour": "rigor", 1360 | "rigours": "rigors", 1361 | "ritualised": "ritualized", 1362 | "rivalled": "rivaled", 1363 | "rivalling": "rivaling", 1364 | "romanticise": "romanticize", 1365 | "romanticised": "romanticized", 1366 | "romanticises": "romanticizes", 1367 | "romanticising": "romanticizing", 1368 | "rumour": "rumor", 1369 | "rumoured": "rumored", 1370 | "rumours": "rumors", 1371 | "sabre": "saber", 1372 | "sabres": "sabers", 1373 | "saltpetre": "saltpeter", 1374 | "sanitise": "sanitize", 1375 | "sanitised": "sanitized", 1376 | "sanitises": "sanitizes", 1377 | "sanitising": "sanitizing", 1378 | "satirise": "satirize", 1379 | "satirised": "satirized", 1380 | "satirises": "satirizes", 1381 | "satirising": "satirizing", 1382 | "saviour": "savior", 1383 | "saviours": "saviors", 1384 | "savour": "savor", 1385 | "savoured": "savored", 1386 | "savouries": "savories", 1387 | "savouring": "savoring", 1388 | "savours": "savors", 1389 | "savoury": "savory", 1390 | "scandalise": "scandalize", 1391 | "scandalised": "scandalized", 1392 | "scandalises": "scandalizes", 1393 | "scandalising": "scandalizing", 1394 | "sceptic": "skeptic", 1395 | "sceptical": "skeptical", 1396 | "sceptically": "skeptically", 1397 | "scepticism": "skepticism", 1398 | "sceptics": "skeptics", 1399 | "sceptre": "scepter", 1400 | "sceptres": "scepters", 1401 | "scrutinise": "scrutinize", 1402 | "scrutinised": "scrutinized", 1403 | "scrutinises": "scrutinizes", 1404 | "scrutinising": "scrutinizing", 1405 | "secularisation": "secularization", 1406 | "secularise": "secularize", 1407 | "secularised": "secularized", 1408 | "secularises": "secularizes", 1409 | "secularising": "secularizing", 1410 | "sensationalise": "sensationalize", 1411 | "sensationalised": "sensationalized", 1412 | "sensationalises": "sensationalizes", 1413 | "sensationalising": "sensationalizing", 1414 | "sensitise": "sensitize", 1415 | "sensitised": "sensitized", 1416 | "sensitises": "sensitizes", 1417 | "sensitising": "sensitizing", 1418 | "sentimentalise": "sentimentalize", 1419 | "sentimentalised": "sentimentalized", 1420 | "sentimentalises": "sentimentalizes", 1421 | "sentimentalising": "sentimentalizing", 1422 | "sepulchre": "sepulcher", 1423 | "sepulchres": "sepulchers", 1424 | "serialisation": "serialization", 1425 | "serialisations": "serializations", 1426 | "serialise": "serialize", 1427 | "serialised": "serialized", 1428 | "serialises": "serializes", 1429 | "serialising": "serializing", 1430 | "sermonise": "sermonize", 1431 | "sermonised": "sermonized", 1432 | "sermonises": "sermonizes", 1433 | "sermonising": "sermonizing", 1434 | "sheikh": "sheik", 1435 | "shovelled": "shoveled", 1436 | "shovelling": "shoveling", 1437 | "shrivelled": "shriveled", 1438 | "shrivelling": "shriveling", 1439 | "signalise": "signalize", 1440 | "signalised": "signalized", 1441 | "signalises": "signalizes", 1442 | "signalising": "signalizing", 1443 | "signalled": "signaled", 1444 | "signalling": "signaling", 1445 | "smoulder": "smolder", 1446 | "smouldered": "smoldered", 1447 | "smouldering": "smoldering", 1448 | "smoulders": "smolders", 1449 | "snivelled": "sniveled", 1450 | "snivelling": "sniveling", 1451 | "snorkelled": "snorkeled", 1452 | "snorkelling": "snorkeling", 1453 | "snowplough": "snowplow", 1454 | "snowploughs": "snowplow", 1455 | "socialisation": "socialization", 1456 | "socialise": "socialize", 1457 | "socialised": "socialized", 1458 | "socialises": "socializes", 1459 | "socialising": "socializing", 1460 | "sodomise": "sodomize", 1461 | "sodomised": "sodomized", 1462 | "sodomises": "sodomizes", 1463 | "sodomising": "sodomizing", 1464 | "solemnise": "solemnize", 1465 | "solemnised": "solemnized", 1466 | "solemnises": "solemnizes", 1467 | "solemnising": "solemnizing", 1468 | "sombre": "somber", 1469 | "specialisation": "specialization", 1470 | "specialisations": "specializations", 1471 | "specialise": "specialize", 1472 | "specialised": "specialized", 1473 | "specialises": "specializes", 1474 | "specialising": "specializing", 1475 | "spectre": "specter", 1476 | "spectres": "specters", 1477 | "spiralled": "spiraled", 1478 | "spiralling": "spiraling", 1479 | "splendour": "splendor", 1480 | "splendours": "splendors", 1481 | "squirrelled": "squirreled", 1482 | "squirrelling": "squirreling", 1483 | "stabilisation": "stabilization", 1484 | "stabilise": "stabilize", 1485 | "stabilised": "stabilized", 1486 | "stabiliser": "stabilizer", 1487 | "stabilisers": "stabilizers", 1488 | "stabilises": "stabilizes", 1489 | "stabilising": "stabilizing", 1490 | "standardisation": "standardization", 1491 | "standardise": "standardize", 1492 | "standardised": "standardized", 1493 | "standardises": "standardizes", 1494 | "standardising": "standardizing", 1495 | "stencilled": "stenciled", 1496 | "stencilling": "stenciling", 1497 | "sterilisation": "sterilization", 1498 | "sterilisations": "sterilizations", 1499 | "sterilise": "sterilize", 1500 | "sterilised": "sterilized", 1501 | "steriliser": "sterilizer", 1502 | "sterilisers": "sterilizers", 1503 | "sterilises": "sterilizes", 1504 | "sterilising": "sterilizing", 1505 | "stigmatisation": "stigmatization", 1506 | "stigmatise": "stigmatize", 1507 | "stigmatised": "stigmatized", 1508 | "stigmatises": "stigmatizes", 1509 | "stigmatising": "stigmatizing", 1510 | "storey": "story", 1511 | "storeys": "stories", 1512 | "subsidisation": "subsidization", 1513 | "subsidise": "subsidize", 1514 | "subsidised": "subsidized", 1515 | "subsidiser": "subsidizer", 1516 | "subsidisers": "subsidizers", 1517 | "subsidises": "subsidizes", 1518 | "subsidising": "subsidizing", 1519 | "succour": "succor", 1520 | "succoured": "succored", 1521 | "succouring": "succoring", 1522 | "succours": "succors", 1523 | "sulphate": "sulfate", 1524 | "sulphates": "sulfates", 1525 | "sulphide": "sulfide", 1526 | "sulphides": "sulfides", 1527 | "sulphur": "sulfur", 1528 | "sulphurous": "sulfurous", 1529 | "summarise": "summarize", 1530 | "summarised": "summarized", 1531 | "summarises": "summarizes", 1532 | "summarising": "summarizing", 1533 | "swivelled": "swiveled", 1534 | "swivelling": "swiveling", 1535 | "symbolise": "symbolize", 1536 | "symbolised": "symbolized", 1537 | "symbolises": "symbolizes", 1538 | "symbolising": "symbolizing", 1539 | "sympathise": "sympathize", 1540 | "sympathised": "sympathized", 1541 | "sympathiser": "sympathizer", 1542 | "sympathisers": "sympathizers", 1543 | "sympathises": "sympathizes", 1544 | "sympathising": "sympathizing", 1545 | "synchronisation": "synchronization", 1546 | "synchronise": "synchronize", 1547 | "synchronised": "synchronized", 1548 | "synchronises": "synchronizes", 1549 | "synchronising": "synchronizing", 1550 | "synthesise": "synthesize", 1551 | "synthesised": "synthesized", 1552 | "synthesiser": "synthesizer", 1553 | "synthesisers": "synthesizers", 1554 | "synthesises": "synthesizes", 1555 | "synthesising": "synthesizing", 1556 | "syphon": "siphon", 1557 | "syphoned": "siphoned", 1558 | "syphoning": "siphoning", 1559 | "syphons": "siphons", 1560 | "systematisation": "systematization", 1561 | "systematise": "systematize", 1562 | "systematised": "systematized", 1563 | "systematises": "systematizes", 1564 | "systematising": "systematizing", 1565 | "tantalise": "tantalize", 1566 | "tantalised": "tantalized", 1567 | "tantalises": "tantalizes", 1568 | "tantalising": "tantalizing", 1569 | "tantalisingly": "tantalizingly", 1570 | "tasselled": "tasseled", 1571 | "technicolour": "technicolor", 1572 | "temporise": "temporize", 1573 | "temporised": "temporized", 1574 | "temporises": "temporizes", 1575 | "temporising": "temporizing", 1576 | "tenderise": "tenderize", 1577 | "tenderised": "tenderized", 1578 | "tenderises": "tenderizes", 1579 | "tenderising": "tenderizing", 1580 | "terrorise": "terrorize", 1581 | "terrorised": "terrorized", 1582 | "terrorises": "terrorizes", 1583 | "terrorising": "terrorizing", 1584 | "theatre": "theater", 1585 | "theatregoer": "theatergoer", 1586 | "theatregoers": "theatergoers", 1587 | "theatres": "theaters", 1588 | "theorise": "theorize", 1589 | "theorised": "theorized", 1590 | "theorises": "theorizes", 1591 | "theorising": "theorizing", 1592 | "tonne": "ton", 1593 | "tonnes": "tons", 1594 | "towelled": "toweled", 1595 | "towelling": "toweling", 1596 | "toxaemia": "toxemia", 1597 | "tranquillise": "tranquilize", 1598 | "tranquillised": "tranquilized", 1599 | "tranquilliser": "tranquilizer", 1600 | "tranquillisers": "tranquilizers", 1601 | "tranquillises": "tranquilizes", 1602 | "tranquillising": "tranquilizing", 1603 | "tranquillity": "tranquility", 1604 | "tranquillize": "tranquilize", 1605 | "tranquillized": "tranquilized", 1606 | "tranquillizer": "tranquilizer", 1607 | "tranquillizers": "tranquilizers", 1608 | "tranquillizes": "tranquilizes", 1609 | "tranquillizing": "tranquilizing", 1610 | "tranquilly": "tranquility", 1611 | "transistorised": "transistorized", 1612 | "traumatise": "traumatize", 1613 | "traumatised": "traumatized", 1614 | "traumatises": "traumatizes", 1615 | "traumatising": "traumatizing", 1616 | "travelled": "traveled", 1617 | "traveller": "traveler", 1618 | "travellers": "travelers", 1619 | "travelling": "traveling", 1620 | "travelog": "travelogue", 1621 | "travelogs": "travelogues", 1622 | "trialled": "trialed", 1623 | "trialling": "trialing", 1624 | "tricolour": "tricolor", 1625 | "tricolours": "tricolors", 1626 | "trivialise": "trivialize", 1627 | "trivialised": "trivialized", 1628 | "trivialises": "trivializes", 1629 | "trivialising": "trivializing", 1630 | "tumour": "tumor", 1631 | "tumours": "tumors", 1632 | "tunnelled": "tunneled", 1633 | "tunnelling": "tunneling", 1634 | "tyrannise": "tyrannize", 1635 | "tyrannised": "tyrannized", 1636 | "tyrannises": "tyrannizes", 1637 | "tyrannising": "tyrannizing", 1638 | "tyre": "tire", 1639 | "tyres": "tires", 1640 | "unauthorised": "unauthorized", 1641 | "uncivilised": "uncivilized", 1642 | "underutilised": "underutilized", 1643 | "unequalled": "unequaled", 1644 | "unfavourable": "unfavorable", 1645 | "unfavourably": "unfavorably", 1646 | "unionisation": "unionization", 1647 | "unionise": "unionize", 1648 | "unionised": "unionized", 1649 | "unionises": "unionizes", 1650 | "unionising": "unionizing", 1651 | "unorganised": "unorganized", 1652 | "unravelled": "unraveled", 1653 | "unravelling": "unraveling", 1654 | "unrecognisable": "unrecognizable", 1655 | "unrecognised": "unrecognized", 1656 | "unrivalled": "unrivaled", 1657 | "unsavoury": "unsavory", 1658 | "untrammelled": "untrammeled", 1659 | "urbanisation": "urbanization", 1660 | "urbanise": "urbanize", 1661 | "urbanised": "urbanized", 1662 | "urbanises": "urbanizes", 1663 | "urbanising": "urbanizing", 1664 | "utilisable": "utilizable", 1665 | "utilisation": "utilization", 1666 | "utilise": "utilize", 1667 | "utilised": "utilized", 1668 | "utilises": "utilizes", 1669 | "utilising": "utilizing", 1670 | "valour": "valor", 1671 | "vandalise": "vandalize", 1672 | "vandalised": "vandalized", 1673 | "vandalises": "vandalizes", 1674 | "vandalising": "vandalizing", 1675 | "vaporisation": "vaporization", 1676 | "vaporise": "vaporize", 1677 | "vaporised": "vaporized", 1678 | "vaporises": "vaporizes", 1679 | "vaporising": "vaporizing", 1680 | "vapour": "vapor", 1681 | "vapours": "vapors", 1682 | "verbalise": "verbalize", 1683 | "verbalised": "verbalized", 1684 | "verbalises": "verbalizes", 1685 | "verbalising": "verbalizing", 1686 | "victimisation": "victimization", 1687 | "victimise": "victimize", 1688 | "victimised": "victimized", 1689 | "victimises": "victimizes", 1690 | "victimising": "victimizing", 1691 | "videodisc": "videodisk", 1692 | "videodiscs": "videodisks", 1693 | "vigour": "vigor", 1694 | "visualisation": "visualization", 1695 | "visualisations": "visualizations", 1696 | "visualise": "visualize", 1697 | "visualised": "visualized", 1698 | "visualises": "visualizes", 1699 | "visualising": "visualizing", 1700 | "vocalisation": "vocalization", 1701 | "vocalisations": "vocalizations", 1702 | "vocalise": "vocalize", 1703 | "vocalised": "vocalized", 1704 | "vocalises": "vocalizes", 1705 | "vocalising": "vocalizing", 1706 | "vulcanised": "vulcanized", 1707 | "vulgarisation": "vulgarization", 1708 | "vulgarise": "vulgarize", 1709 | "vulgarised": "vulgarized", 1710 | "vulgarises": "vulgarizes", 1711 | "vulgarising": "vulgarizing", 1712 | "waggon": "wagon", 1713 | "waggons": "wagons", 1714 | "watercolour": "watercolor", 1715 | "watercolours": "watercolors", 1716 | "weaselled": "weaseled", 1717 | "weaselling": "weaseling", 1718 | "westernisation": "westernization", 1719 | "westernise": "westernize", 1720 | "westernised": "westernized", 1721 | "westernises": "westernizes", 1722 | "westernising": "westernizing", 1723 | "womanise": "womanize", 1724 | "womanised": "womanized", 1725 | "womaniser": "womanizer", 1726 | "womanisers": "womanizers", 1727 | "womanises": "womanizes", 1728 | "womanising": "womanizing", 1729 | "woollen": "woolen", 1730 | "woollens": "woolens", 1731 | "woollies": "woolies", 1732 | "woolly": "wooly", 1733 | "worshipped": "worshiped", 1734 | "worshipper": "worshiper", 1735 | "worshipping": "worshiping", 1736 | "yodelled": "yodeled", 1737 | "yodelling": "yodeling", 1738 | "yoghourt": "yogurt", 1739 | "yoghourts": "yogurts", 1740 | "yoghurt": "yogurt", 1741 | "yoghurts": "yogurts" 1742 | } 1743 | -------------------------------------------------------------------------------- /benchmark/requirements.benchmark.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | jiwer 3 | datasets 4 | memory_profiler 5 | py3nvml 6 | pytubefix 7 | -------------------------------------------------------------------------------- /benchmark/speed_benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import timeit 3 | 4 | from typing import Callable 5 | 6 | from utils import inference 7 | 8 | parser = argparse.ArgumentParser(description="Speed benchmark") 9 | parser.add_argument( 10 | "--repeat", 11 | type=int, 12 | default=3, 13 | help="Times an experiment will be run.", 14 | ) 15 | args = parser.parse_args() 16 | 17 | 18 | def measure_speed(func: Callable[[], None]): 19 | # as written in https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat, 20 | # min should be taken rather than the average 21 | runtimes = timeit.repeat( 22 | func, 23 | repeat=args.repeat, 24 | number=10, 25 | ) 26 | print(runtimes) 27 | print("Min execution time: %.3fs" % (min(runtimes) / 10.0)) 28 | 29 | 30 | if __name__ == "__main__": 31 | measure_speed(inference) 32 | -------------------------------------------------------------------------------- /benchmark/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from threading import Thread 4 | from typing import Optional 5 | 6 | from faster_whisper import WhisperModel 7 | 8 | model_path = "large-v3" 9 | model = WhisperModel(model_path, device="cuda") 10 | 11 | 12 | def inference(): 13 | segments, info = model.transcribe("benchmark.m4a", language="fr") 14 | for segment in segments: 15 | print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) 16 | 17 | 18 | def get_logger(name: Optional[str] = None) -> logging.Logger: 19 | formatter = logging.Formatter("%(levelname)s: %(message)s") 20 | logger = logging.getLogger(name) 21 | logger.setLevel(logging.DEBUG) 22 | handler = logging.StreamHandler() 23 | handler.setFormatter(formatter) 24 | logger.addHandler(handler) 25 | return logger 26 | 27 | 28 | class MyThread(Thread): 29 | def __init__(self, func, params): 30 | super(MyThread, self).__init__() 31 | self.func = func 32 | self.params = params 33 | self.result = None 34 | 35 | def run(self): 36 | self.result = self.func(*self.params) 37 | 38 | def get_result(self): 39 | return self.result 40 | -------------------------------------------------------------------------------- /benchmark/wer_benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | 5 | from datasets import load_dataset 6 | from jiwer import wer 7 | from tqdm import tqdm 8 | from transformers.models.whisper.english_normalizer import EnglishTextNormalizer 9 | 10 | from faster_whisper import WhisperModel 11 | 12 | parser = argparse.ArgumentParser(description="WER benchmark") 13 | parser.add_argument( 14 | "--audio_numb", 15 | type=int, 16 | default=None, 17 | help="Specify the number of validation audio files in the dataset." 18 | " Set to None to retrieve all audio files.", 19 | ) 20 | args = parser.parse_args() 21 | 22 | model_path = "large-v3" 23 | model = WhisperModel(model_path, device="cuda") 24 | 25 | # load the dataset with streaming mode 26 | dataset = load_dataset("librispeech_asr", "clean", split="validation", streaming=True) 27 | 28 | with open(os.path.join(os.path.dirname(__file__), "normalizer.json"), "r") as f: 29 | normalizer = EnglishTextNormalizer(json.load(f)) 30 | 31 | 32 | def inference(batch): 33 | batch["transcription"] = [] 34 | for sample in batch["audio"]: 35 | segments, info = model.transcribe(sample["array"], language="en") 36 | batch["transcription"].append("".join([segment.text for segment in segments])) 37 | batch["reference"] = batch["text"] 38 | return batch 39 | 40 | 41 | dataset = dataset.map(function=inference, batched=True, batch_size=16) 42 | 43 | all_transcriptions = [] 44 | all_references = [] 45 | 46 | # iterate over the dataset and run inference 47 | for i, result in tqdm(enumerate(dataset), desc="Evaluating..."): 48 | all_transcriptions.append(result["transcription"]) 49 | all_references.append(result["reference"]) 50 | if args.audio_numb and i == (args.audio_numb - 1): 51 | break 52 | 53 | # normalize predictions and references 54 | all_transcriptions = [normalizer(transcription) for transcription in all_transcriptions] 55 | all_references = [normalizer(reference) for reference in all_references] 56 | 57 | # compute the WER metric 58 | word_error_rate = 100 * wer(hypothesis=all_transcriptions, reference=all_references) 59 | print("WER: %.3f" % word_error_rate) 60 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 2 | WORKDIR /root 3 | RUN apt-get update -y && apt-get install -y python3-pip 4 | COPY infer.py jfk.flac ./ 5 | RUN pip3 install faster-whisper 6 | CMD ["python3", "infer.py"] 7 | -------------------------------------------------------------------------------- /docker/infer.py: -------------------------------------------------------------------------------- 1 | from faster_whisper import WhisperModel 2 | 3 | jfk_path = "jfk.flac" 4 | model = WhisperModel("tiny", device="cuda") 5 | segments, info = model.transcribe(jfk_path, word_timestamps=True) 6 | for segment in segments: 7 | print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) 8 | -------------------------------------------------------------------------------- /docker/jfk.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/docker/jfk.flac -------------------------------------------------------------------------------- /faster_whisper/__init__.py: -------------------------------------------------------------------------------- 1 | from faster_whisper.audio import decode_audio 2 | from faster_whisper.transcribe import BatchedInferencePipeline, WhisperModel 3 | from faster_whisper.utils import available_models, download_model, format_timestamp 4 | from faster_whisper.version import __version__ 5 | 6 | __all__ = [ 7 | "available_models", 8 | "decode_audio", 9 | "WhisperModel", 10 | "BatchedInferencePipeline", 11 | "download_model", 12 | "format_timestamp", 13 | "__version__", 14 | ] 15 | -------------------------------------------------------------------------------- /faster_whisper/assets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/faster_whisper/assets/__init__.py -------------------------------------------------------------------------------- /faster_whisper/assets/silero_decoder_v5.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/faster_whisper/assets/silero_decoder_v5.onnx -------------------------------------------------------------------------------- /faster_whisper/assets/silero_encoder_v5.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/faster_whisper/assets/silero_encoder_v5.onnx -------------------------------------------------------------------------------- /faster_whisper/audio.py: -------------------------------------------------------------------------------- 1 | """We use the PyAV library to decode the audio: https://github.com/PyAV-Org/PyAV 2 | 3 | The advantage of PyAV is that it bundles the FFmpeg libraries so there is no additional 4 | system dependencies. FFmpeg does not need to be installed on the system. 5 | 6 | However, the API is quite low-level so we need to manipulate audio frames directly. 7 | """ 8 | 9 | import gc 10 | import io 11 | import itertools 12 | 13 | from typing import BinaryIO, Union 14 | 15 | import av 16 | import numpy as np 17 | 18 | 19 | def decode_audio( 20 | input_file: Union[str, BinaryIO], 21 | sampling_rate: int = 16000, 22 | split_stereo: bool = False, 23 | ): 24 | """Decodes the audio. 25 | 26 | Args: 27 | input_file: Path to the input file or a file-like object. 28 | sampling_rate: Resample the audio to this sample rate. 29 | split_stereo: Return separate left and right channels. 30 | 31 | Returns: 32 | A float32 Numpy array. 33 | 34 | If `split_stereo` is enabled, the function returns a 2-tuple with the 35 | separated left and right channels. 36 | """ 37 | resampler = av.audio.resampler.AudioResampler( 38 | format="s16", 39 | layout="mono" if not split_stereo else "stereo", 40 | rate=sampling_rate, 41 | ) 42 | 43 | raw_buffer = io.BytesIO() 44 | dtype = None 45 | 46 | with av.open(input_file, mode="r", metadata_errors="ignore") as container: 47 | frames = container.decode(audio=0) 48 | frames = _ignore_invalid_frames(frames) 49 | frames = _group_frames(frames, 500000) 50 | frames = _resample_frames(frames, resampler) 51 | 52 | for frame in frames: 53 | array = frame.to_ndarray() 54 | dtype = array.dtype 55 | raw_buffer.write(array) 56 | 57 | # It appears that some objects related to the resampler are not freed 58 | # unless the garbage collector is manually run. 59 | # https://github.com/SYSTRAN/faster-whisper/issues/390 60 | # note that this slows down loading the audio a little bit 61 | # if that is a concern, please use ffmpeg directly as in here: 62 | # https://github.com/openai/whisper/blob/25639fc/whisper/audio.py#L25-L62 63 | del resampler 64 | gc.collect() 65 | 66 | audio = np.frombuffer(raw_buffer.getbuffer(), dtype=dtype) 67 | 68 | # Convert s16 back to f32. 69 | audio = audio.astype(np.float32) / 32768.0 70 | 71 | if split_stereo: 72 | left_channel = audio[0::2] 73 | right_channel = audio[1::2] 74 | return left_channel, right_channel 75 | 76 | return audio 77 | 78 | 79 | def _ignore_invalid_frames(frames): 80 | iterator = iter(frames) 81 | 82 | while True: 83 | try: 84 | yield next(iterator) 85 | except StopIteration: 86 | break 87 | except av.error.InvalidDataError: 88 | continue 89 | 90 | 91 | def _group_frames(frames, num_samples=None): 92 | fifo = av.audio.fifo.AudioFifo() 93 | 94 | for frame in frames: 95 | frame.pts = None # Ignore timestamp check. 96 | fifo.write(frame) 97 | 98 | if num_samples is not None and fifo.samples >= num_samples: 99 | yield fifo.read() 100 | 101 | if fifo.samples > 0: 102 | yield fifo.read() 103 | 104 | 105 | def _resample_frames(frames, resampler): 106 | # Add None to flush the resampler. 107 | for frame in itertools.chain(frames, [None]): 108 | yield from resampler.resample(frame) 109 | 110 | 111 | def pad_or_trim(array, length: int = 3000, *, axis: int = -1): 112 | """ 113 | Pad or trim the Mel features array to 3000, as expected by the encoder. 114 | """ 115 | if array.shape[axis] > length: 116 | array = array.take(indices=range(length), axis=axis) 117 | 118 | if array.shape[axis] < length: 119 | pad_widths = [(0, 0)] * array.ndim 120 | pad_widths[axis] = (0, length - array.shape[axis]) 121 | array = np.pad(array, pad_widths) 122 | 123 | return array 124 | -------------------------------------------------------------------------------- /faster_whisper/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class FeatureExtractor: 5 | def __init__( 6 | self, 7 | feature_size=80, 8 | sampling_rate=16000, 9 | hop_length=160, 10 | chunk_length=30, 11 | n_fft=400, 12 | ): 13 | self.n_fft = n_fft 14 | self.hop_length = hop_length 15 | self.chunk_length = chunk_length 16 | self.n_samples = chunk_length * sampling_rate 17 | self.nb_max_frames = self.n_samples // hop_length 18 | self.time_per_frame = hop_length / sampling_rate 19 | self.sampling_rate = sampling_rate 20 | self.mel_filters = self.get_mel_filters( 21 | sampling_rate, n_fft, n_mels=feature_size 22 | ).astype("float32") 23 | 24 | @staticmethod 25 | def get_mel_filters(sr, n_fft, n_mels=128): 26 | # Initialize the weights 27 | n_mels = int(n_mels) 28 | 29 | # Center freqs of each FFT bin 30 | fftfreqs = np.fft.rfftfreq(n=n_fft, d=1.0 / sr) 31 | 32 | # 'Center freqs' of mel bands - uniformly spaced between limits 33 | min_mel = 0.0 34 | max_mel = 45.245640471924965 35 | 36 | mels = np.linspace(min_mel, max_mel, n_mels + 2) 37 | 38 | # Fill in the linear scale 39 | f_min = 0.0 40 | f_sp = 200.0 / 3 41 | freqs = f_min + f_sp * mels 42 | 43 | # And now the nonlinear scale 44 | min_log_hz = 1000.0 # beginning of log region (Hz) 45 | min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels) 46 | logstep = np.log(6.4) / 27.0 # step size for log region 47 | 48 | # If we have vector data, vectorize 49 | log_t = mels >= min_log_mel 50 | freqs[log_t] = min_log_hz * np.exp(logstep * (mels[log_t] - min_log_mel)) 51 | 52 | fdiff = np.diff(freqs) 53 | ramps = freqs.reshape(-1, 1) - fftfreqs.reshape(1, -1) 54 | 55 | lower = -ramps[:-2] / np.expand_dims(fdiff[:-1], axis=1) 56 | upper = ramps[2:] / np.expand_dims(fdiff[1:], axis=1) 57 | 58 | # Intersect them with each other and zero, vectorized across all i 59 | weights = np.maximum(np.zeros_like(lower), np.minimum(lower, upper)) 60 | 61 | # Slaney-style mel is scaled to be approx constant energy per channel 62 | enorm = 2.0 / (freqs[2 : n_mels + 2] - freqs[:n_mels]) 63 | weights *= np.expand_dims(enorm, axis=1) 64 | 65 | return weights 66 | 67 | @staticmethod 68 | def stft( 69 | input_array: np.ndarray, 70 | n_fft: int, 71 | hop_length: int = None, 72 | win_length: int = None, 73 | window: np.ndarray = None, 74 | center: bool = True, 75 | mode: str = "reflect", 76 | normalized: bool = False, 77 | onesided: bool = None, 78 | return_complex: bool = None, 79 | ): 80 | # Default initialization for hop_length and win_length 81 | hop_length = hop_length if hop_length is not None else n_fft // 4 82 | win_length = win_length if win_length is not None else n_fft 83 | input_is_complex = np.iscomplexobj(input_array) 84 | 85 | # Determine if the output should be complex 86 | return_complex = ( 87 | return_complex 88 | if return_complex is not None 89 | else (input_is_complex or (window is not None and np.iscomplexobj(window))) 90 | ) 91 | 92 | if not return_complex and return_complex is None: 93 | raise ValueError( 94 | "stft requires the return_complex parameter for real inputs." 95 | ) 96 | 97 | # Input checks 98 | if not np.issubdtype(input_array.dtype, np.floating) and not input_is_complex: 99 | raise ValueError( 100 | "stft: expected an array of floating point or complex values," 101 | f" got {input_array.dtype}" 102 | ) 103 | 104 | if input_array.ndim > 2 or input_array.ndim < 1: 105 | raise ValueError( 106 | f"stft: expected a 1D or 2D array, but got {input_array.ndim}D array" 107 | ) 108 | 109 | # Handle 1D input 110 | if input_array.ndim == 1: 111 | input_array = np.expand_dims(input_array, axis=0) 112 | input_array_1d = True 113 | else: 114 | input_array_1d = False 115 | 116 | # Center padding if required 117 | if center: 118 | pad_amount = n_fft // 2 119 | input_array = np.pad( 120 | input_array, ((0, 0), (pad_amount, pad_amount)), mode=mode 121 | ) 122 | 123 | batch, length = input_array.shape 124 | 125 | # Additional input checks 126 | if n_fft <= 0 or n_fft > length: 127 | raise ValueError( 128 | f"stft: expected 0 < n_fft <= {length}, but got n_fft={n_fft}" 129 | ) 130 | 131 | if hop_length <= 0: 132 | raise ValueError( 133 | f"stft: expected hop_length > 0, but got hop_length={hop_length}" 134 | ) 135 | 136 | if win_length <= 0 or win_length > n_fft: 137 | raise ValueError( 138 | f"stft: expected 0 < win_length <= n_fft, but got win_length={win_length}" 139 | ) 140 | 141 | if window is not None: 142 | if window.ndim != 1 or window.shape[0] != win_length: 143 | raise ValueError( 144 | f"stft: expected a 1D window array of size equal to win_length={win_length}, " 145 | f"but got window with size {window.shape}" 146 | ) 147 | 148 | # Handle padding of the window if necessary 149 | if win_length < n_fft: 150 | left = (n_fft - win_length) // 2 151 | window_ = np.zeros(n_fft, dtype=window.dtype) 152 | window_[left : left + win_length] = window 153 | else: 154 | window_ = window 155 | 156 | # Calculate the number of frames 157 | n_frames = 1 + (length - n_fft) // hop_length 158 | 159 | # Time to columns 160 | input_array = np.lib.stride_tricks.as_strided( 161 | input_array, 162 | (batch, n_frames, n_fft), 163 | ( 164 | input_array.strides[0], 165 | hop_length * input_array.strides[1], 166 | input_array.strides[1], 167 | ), 168 | ) 169 | 170 | if window_ is not None: 171 | input_array = input_array * window_ 172 | 173 | # FFT and transpose 174 | complex_fft = input_is_complex 175 | onesided = onesided if onesided is not None else not complex_fft 176 | 177 | if normalized: 178 | norm = "ortho" 179 | else: 180 | norm = None 181 | 182 | if complex_fft: 183 | if onesided: 184 | raise ValueError( 185 | "Cannot have onesided output if window or input is complex" 186 | ) 187 | output = np.fft.fft(input_array, n=n_fft, axis=-1, norm=norm) 188 | else: 189 | output = np.fft.rfft(input_array, n=n_fft, axis=-1, norm=norm) 190 | 191 | output = output.transpose((0, 2, 1)) 192 | 193 | if input_array_1d: 194 | output = output.squeeze(0) 195 | 196 | return output if return_complex else np.real(output) 197 | 198 | def __call__(self, waveform: np.ndarray, padding=160, chunk_length=None): 199 | """ 200 | Compute the log-Mel spectrogram of the provided audio. 201 | """ 202 | 203 | if chunk_length is not None: 204 | self.n_samples = chunk_length * self.sampling_rate 205 | self.nb_max_frames = self.n_samples // self.hop_length 206 | 207 | if waveform.dtype is not np.float32: 208 | waveform = waveform.astype(np.float32) 209 | 210 | if padding: 211 | waveform = np.pad(waveform, (0, padding)) 212 | 213 | window = np.hanning(self.n_fft + 1)[:-1].astype("float32") 214 | 215 | stft = self.stft( 216 | waveform, 217 | self.n_fft, 218 | self.hop_length, 219 | window=window, 220 | return_complex=True, 221 | ).astype("complex64") 222 | magnitudes = np.abs(stft[..., :-1]) ** 2 223 | 224 | mel_spec = self.mel_filters @ magnitudes 225 | 226 | log_spec = np.log10(np.clip(mel_spec, a_min=1e-10, a_max=None)) 227 | log_spec = np.maximum(log_spec, log_spec.max() - 8.0) 228 | log_spec = (log_spec + 4.0) / 4.0 229 | 230 | return log_spec 231 | -------------------------------------------------------------------------------- /faster_whisper/tokenizer.py: -------------------------------------------------------------------------------- 1 | import string 2 | 3 | from functools import cached_property 4 | from typing import List, Optional, Tuple 5 | 6 | import tokenizers 7 | 8 | 9 | class Tokenizer: 10 | """Simple wrapper around a tokenizers.Tokenizer.""" 11 | 12 | def __init__( 13 | self, 14 | tokenizer: tokenizers.Tokenizer, 15 | multilingual: bool, 16 | task: Optional[str] = None, 17 | language: Optional[str] = None, 18 | ): 19 | self.tokenizer = tokenizer 20 | 21 | if multilingual: 22 | if task not in _TASKS: 23 | raise ValueError( 24 | "'%s' is not a valid task (accepted tasks: %s)" 25 | % (task, ", ".join(_TASKS)) 26 | ) 27 | 28 | if language not in _LANGUAGE_CODES: 29 | raise ValueError( 30 | "'%s' is not a valid language code (accepted language codes: %s)" 31 | % (language, ", ".join(_LANGUAGE_CODES)) 32 | ) 33 | 34 | self.task = self.tokenizer.token_to_id("<|%s|>" % task) 35 | self.language = self.tokenizer.token_to_id("<|%s|>" % language) 36 | self.language_code = language 37 | else: 38 | self.task = None 39 | self.language = None 40 | self.language_code = "en" 41 | 42 | @cached_property 43 | def transcribe(self) -> int: 44 | return self.tokenizer.token_to_id("<|transcribe|>") 45 | 46 | @cached_property 47 | def translate(self) -> int: 48 | return self.tokenizer.token_to_id("<|translate|>") 49 | 50 | @cached_property 51 | def sot(self) -> int: 52 | return self.tokenizer.token_to_id("<|startoftranscript|>") 53 | 54 | @cached_property 55 | def sot_lm(self) -> int: 56 | return self.tokenizer.token_to_id("<|startoflm|>") 57 | 58 | @cached_property 59 | def sot_prev(self) -> int: 60 | return self.tokenizer.token_to_id("<|startofprev|>") 61 | 62 | @cached_property 63 | def eot(self) -> int: 64 | return self.tokenizer.token_to_id("<|endoftext|>") 65 | 66 | @cached_property 67 | def no_timestamps(self) -> int: 68 | return self.tokenizer.token_to_id("<|notimestamps|>") 69 | 70 | @property 71 | def timestamp_begin(self) -> int: 72 | return self.no_timestamps + 1 73 | 74 | @property 75 | def sot_sequence(self) -> List[int]: 76 | sequence = [self.sot] 77 | 78 | if self.language is not None: 79 | sequence.append(self.language) 80 | 81 | if self.task is not None: 82 | sequence.append(self.task) 83 | 84 | return sequence 85 | 86 | def encode(self, text: str) -> List[int]: 87 | return self.tokenizer.encode(text, add_special_tokens=False).ids 88 | 89 | def decode(self, tokens: List[int]) -> str: 90 | text_tokens = [token for token in tokens if token < self.eot] 91 | return self.tokenizer.decode(text_tokens) 92 | 93 | def decode_with_timestamps(self, tokens: List[int]) -> str: 94 | outputs = [[]] 95 | 96 | for token in tokens: 97 | if token >= self.timestamp_begin: 98 | timestamp = f"<|{(token - self.timestamp_begin) * 0.02:.2f}|>" 99 | outputs.append(timestamp) 100 | outputs.append([]) 101 | else: 102 | outputs[-1].append(token) 103 | 104 | return "".join( 105 | [s if isinstance(s, str) else self.tokenizer.decode(s) for s in outputs] 106 | ) 107 | 108 | @cached_property 109 | def non_speech_tokens(self) -> Tuple[int]: 110 | """ 111 | Returns the list of tokens to suppress in order to avoid any speaker tags or non-speech 112 | annotations, to prevent sampling texts that are not actually spoken in the audio, e.g. 113 | 114 | - ♪♪♪ 115 | - ( SPEAKING FOREIGN LANGUAGE ) 116 | - [DAVID] Hey there, 117 | 118 | keeping basic punctuations like commas, periods, question marks, exclamation points, etc. 119 | """ 120 | symbols = list('"#()*+/:;<=>@[\\]^_`{|}~「」『』') 121 | symbols += ( 122 | "<< >> <<< >>> -- --- -( -[ (' (\" (( )) ((( ))) [[ ]] {{ }} ♪♪ ♪♪♪".split() 123 | ) 124 | 125 | # symbols that may be a single token or multiple tokens depending on the tokenizer. 126 | # In case they're multiple tokens, suppress the first token, which is safe because: 127 | # These are between U+2640 and U+267F miscellaneous symbols that are okay to suppress 128 | # in generations, and in the 3-byte UTF-8 representation they share the first two bytes. 129 | miscellaneous = set("♩♪♫♬♭♮♯") 130 | assert all(0x2640 <= ord(c) <= 0x267F for c in miscellaneous) 131 | 132 | # allow hyphens "-" and single quotes "'" between words, but not at the beginning of a word 133 | result = {self.encode(" -")[0], self.encode(" '")[0]} 134 | for symbol in symbols + list(miscellaneous): 135 | for tokens in [ 136 | self.encode(symbol), 137 | self.encode(" " + symbol), 138 | ]: 139 | if len(tokens) == 1 or symbol in miscellaneous: 140 | result.add(tokens[0]) 141 | 142 | return tuple(sorted(result)) 143 | 144 | def split_to_word_tokens( 145 | self, tokens: List[int] 146 | ) -> Tuple[List[str], List[List[int]]]: 147 | if self.language_code in {"zh", "ja", "th", "lo", "my", "yue"}: 148 | # These languages don't typically use spaces, so it is difficult to split words 149 | # without morpheme analysis. Here, we instead split words at any 150 | # position where the tokens are decoded as valid unicode points 151 | return self.split_tokens_on_unicode(tokens) 152 | 153 | return self.split_tokens_on_spaces(tokens) 154 | 155 | def split_tokens_on_unicode( 156 | self, tokens: List[int] 157 | ) -> Tuple[List[str], List[List[int]]]: 158 | decoded_full = self.decode_with_timestamps(tokens) 159 | replacement_char = "\ufffd" 160 | 161 | words = [] 162 | word_tokens = [] 163 | current_tokens = [] 164 | unicode_offset = 0 165 | 166 | for token in tokens: 167 | current_tokens.append(token) 168 | decoded = self.decode_with_timestamps(current_tokens) 169 | 170 | try: 171 | replacement_char_index = decoded.index(replacement_char) 172 | replacement_char_index += unicode_offset 173 | except ValueError: 174 | replacement_char_index = None 175 | 176 | if replacement_char_index is None or ( 177 | replacement_char_index < len(decoded_full) 178 | and decoded_full[replacement_char_index] == replacement_char 179 | ): 180 | words.append(decoded) 181 | word_tokens.append(current_tokens) 182 | current_tokens = [] 183 | unicode_offset += len(decoded) 184 | 185 | return words, word_tokens 186 | 187 | def split_tokens_on_spaces( 188 | self, tokens: List[int] 189 | ) -> Tuple[List[str], List[List[int]]]: 190 | subwords, subword_tokens_list = self.split_tokens_on_unicode(tokens) 191 | words = [] 192 | word_tokens = [] 193 | 194 | for subword, subword_tokens in zip(subwords, subword_tokens_list): 195 | special = subword_tokens[0] >= self.eot 196 | with_space = subword.startswith(" ") 197 | punctuation = subword.strip() in string.punctuation 198 | if special or with_space or punctuation or len(words) == 0: 199 | words.append(subword) 200 | word_tokens.append(subword_tokens) 201 | else: 202 | words[-1] = words[-1] + subword 203 | word_tokens[-1].extend(subword_tokens) 204 | 205 | return words, word_tokens 206 | 207 | 208 | _TASKS = ( 209 | "transcribe", 210 | "translate", 211 | ) 212 | 213 | _LANGUAGE_CODES = ( 214 | "af", 215 | "am", 216 | "ar", 217 | "as", 218 | "az", 219 | "ba", 220 | "be", 221 | "bg", 222 | "bn", 223 | "bo", 224 | "br", 225 | "bs", 226 | "ca", 227 | "cs", 228 | "cy", 229 | "da", 230 | "de", 231 | "el", 232 | "en", 233 | "es", 234 | "et", 235 | "eu", 236 | "fa", 237 | "fi", 238 | "fo", 239 | "fr", 240 | "gl", 241 | "gu", 242 | "ha", 243 | "haw", 244 | "he", 245 | "hi", 246 | "hr", 247 | "ht", 248 | "hu", 249 | "hy", 250 | "id", 251 | "is", 252 | "it", 253 | "ja", 254 | "jw", 255 | "ka", 256 | "kk", 257 | "km", 258 | "kn", 259 | "ko", 260 | "la", 261 | "lb", 262 | "ln", 263 | "lo", 264 | "lt", 265 | "lv", 266 | "mg", 267 | "mi", 268 | "mk", 269 | "ml", 270 | "mn", 271 | "mr", 272 | "ms", 273 | "mt", 274 | "my", 275 | "ne", 276 | "nl", 277 | "nn", 278 | "no", 279 | "oc", 280 | "pa", 281 | "pl", 282 | "ps", 283 | "pt", 284 | "ro", 285 | "ru", 286 | "sa", 287 | "sd", 288 | "si", 289 | "sk", 290 | "sl", 291 | "sn", 292 | "so", 293 | "sq", 294 | "sr", 295 | "su", 296 | "sv", 297 | "sw", 298 | "ta", 299 | "te", 300 | "tg", 301 | "th", 302 | "tk", 303 | "tl", 304 | "tr", 305 | "tt", 306 | "uk", 307 | "ur", 308 | "uz", 309 | "vi", 310 | "yi", 311 | "yo", 312 | "zh", 313 | "yue", 314 | ) 315 | -------------------------------------------------------------------------------- /faster_whisper/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import re 4 | 5 | from typing import List, Optional, Union 6 | 7 | import huggingface_hub 8 | import requests 9 | 10 | from tqdm.auto import tqdm 11 | 12 | _MODELS = { 13 | "tiny.en": "Systran/faster-whisper-tiny.en", 14 | "tiny": "Systran/faster-whisper-tiny", 15 | "base.en": "Systran/faster-whisper-base.en", 16 | "base": "Systran/faster-whisper-base", 17 | "small.en": "Systran/faster-whisper-small.en", 18 | "small": "Systran/faster-whisper-small", 19 | "medium.en": "Systran/faster-whisper-medium.en", 20 | "medium": "Systran/faster-whisper-medium", 21 | "large-v1": "Systran/faster-whisper-large-v1", 22 | "large-v2": "Systran/faster-whisper-large-v2", 23 | "large-v3": "Systran/faster-whisper-large-v3", 24 | "large": "Systran/faster-whisper-large-v3", 25 | "distil-large-v2": "Systran/faster-distil-whisper-large-v2", 26 | "distil-medium.en": "Systran/faster-distil-whisper-medium.en", 27 | "distil-small.en": "Systran/faster-distil-whisper-small.en", 28 | "distil-large-v3": "Systran/faster-distil-whisper-large-v3", 29 | "distil-large-v3.5": "distil-whisper/distil-large-v3.5-ct2", 30 | "large-v3-turbo": "mobiuslabsgmbh/faster-whisper-large-v3-turbo", 31 | "turbo": "mobiuslabsgmbh/faster-whisper-large-v3-turbo", 32 | } 33 | 34 | 35 | def available_models() -> List[str]: 36 | """Returns the names of available models.""" 37 | return list(_MODELS.keys()) 38 | 39 | 40 | def get_assets_path(): 41 | """Returns the path to the assets directory.""" 42 | return os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") 43 | 44 | 45 | def get_logger(): 46 | """Returns the module logger.""" 47 | return logging.getLogger("faster_whisper") 48 | 49 | 50 | def download_model( 51 | size_or_id: str, 52 | output_dir: Optional[str] = None, 53 | local_files_only: bool = False, 54 | cache_dir: Optional[str] = None, 55 | revision: Optional[str] = None, 56 | use_auth_token: Optional[Union[str, bool]] = None, 57 | ): 58 | """Downloads a CTranslate2 Whisper model from the Hugging Face Hub. 59 | 60 | Args: 61 | size_or_id: Size of the model to download from https://huggingface.co/Systran 62 | (tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en, 63 | distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2, 64 | distil-large-v3), or a CTranslate2-converted model ID from the Hugging Face Hub 65 | (e.g. Systran/faster-whisper-large-v3). 66 | output_dir: Directory where the model should be saved. If not set, the model is saved in 67 | the cache directory. 68 | local_files_only: If True, avoid downloading the file and return the path to the local 69 | cached file if it exists. 70 | cache_dir: Path to the folder where cached files are stored. 71 | revision: An optional Git revision id which can be a branch name, a tag, or a 72 | commit hash. 73 | use_auth_token: HuggingFace authentication token or True to use the 74 | token stored by the HuggingFace config folder. 75 | 76 | Returns: 77 | The path to the downloaded model. 78 | 79 | Raises: 80 | ValueError: if the model size is invalid. 81 | """ 82 | if re.match(r".*/.*", size_or_id): 83 | repo_id = size_or_id 84 | else: 85 | repo_id = _MODELS.get(size_or_id) 86 | if repo_id is None: 87 | raise ValueError( 88 | "Invalid model size '%s', expected one of: %s" 89 | % (size_or_id, ", ".join(_MODELS.keys())) 90 | ) 91 | 92 | allow_patterns = [ 93 | "config.json", 94 | "preprocessor_config.json", 95 | "model.bin", 96 | "tokenizer.json", 97 | "vocabulary.*", 98 | ] 99 | 100 | kwargs = { 101 | "local_files_only": local_files_only, 102 | "allow_patterns": allow_patterns, 103 | "tqdm_class": disabled_tqdm, 104 | "revision": revision, 105 | } 106 | 107 | if output_dir is not None: 108 | kwargs["local_dir"] = output_dir 109 | kwargs["local_dir_use_symlinks"] = False 110 | 111 | if cache_dir is not None: 112 | kwargs["cache_dir"] = cache_dir 113 | 114 | if use_auth_token is not None: 115 | kwargs["token"] = use_auth_token 116 | 117 | try: 118 | return huggingface_hub.snapshot_download(repo_id, **kwargs) 119 | except ( 120 | huggingface_hub.utils.HfHubHTTPError, 121 | requests.exceptions.ConnectionError, 122 | ) as exception: 123 | logger = get_logger() 124 | logger.warning( 125 | "An error occured while synchronizing the model %s from the Hugging Face Hub:\n%s", 126 | repo_id, 127 | exception, 128 | ) 129 | logger.warning( 130 | "Trying to load the model directly from the local cache, if it exists." 131 | ) 132 | 133 | kwargs["local_files_only"] = True 134 | return huggingface_hub.snapshot_download(repo_id, **kwargs) 135 | 136 | 137 | def format_timestamp( 138 | seconds: float, 139 | always_include_hours: bool = False, 140 | decimal_marker: str = ".", 141 | ) -> str: 142 | assert seconds >= 0, "non-negative timestamp expected" 143 | milliseconds = round(seconds * 1000.0) 144 | 145 | hours = milliseconds // 3_600_000 146 | milliseconds -= hours * 3_600_000 147 | 148 | minutes = milliseconds // 60_000 149 | milliseconds -= minutes * 60_000 150 | 151 | seconds = milliseconds // 1_000 152 | milliseconds -= seconds * 1_000 153 | 154 | hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" 155 | return ( 156 | f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}" 157 | ) 158 | 159 | 160 | class disabled_tqdm(tqdm): 161 | def __init__(self, *args, **kwargs): 162 | kwargs["disable"] = True 163 | super().__init__(*args, **kwargs) 164 | 165 | 166 | def get_end(segments: List[dict]) -> Optional[float]: 167 | return next( 168 | (w["end"] for s in reversed(segments) for w in reversed(s["words"])), 169 | segments[-1]["end"] if segments else None, 170 | ) 171 | -------------------------------------------------------------------------------- /faster_whisper/vad.py: -------------------------------------------------------------------------------- 1 | import bisect 2 | import functools 3 | import os 4 | 5 | from dataclasses import dataclass 6 | from typing import Dict, List, Optional, Tuple 7 | 8 | import numpy as np 9 | 10 | from faster_whisper.utils import get_assets_path 11 | 12 | 13 | # The code below is adapted from https://github.com/snakers4/silero-vad. 14 | @dataclass 15 | class VadOptions: 16 | """VAD options. 17 | 18 | Attributes: 19 | threshold: Speech threshold. Silero VAD outputs speech probabilities for each audio chunk, 20 | probabilities ABOVE this value are considered as SPEECH. It is better to tune this 21 | parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets. 22 | neg_threshold: Silence threshold for determining the end of speech. If a probability is lower 23 | than neg_threshold, it is always considered silence. Values higher than neg_threshold 24 | are only considered speech if the previous sample was classified as speech; otherwise, 25 | they are treated as silence. This parameter helps refine the detection of speech 26 | transitions, ensuring smoother segment boundaries. 27 | min_speech_duration_ms: Final speech chunks shorter min_speech_duration_ms are thrown out. 28 | max_speech_duration_s: Maximum duration of speech chunks in seconds. Chunks longer 29 | than max_speech_duration_s will be split at the timestamp of the last silence that 30 | lasts more than 100ms (if any), to prevent aggressive cutting. Otherwise, they will be 31 | split aggressively just before max_speech_duration_s. 32 | min_silence_duration_ms: In the end of each speech chunk wait for min_silence_duration_ms 33 | before separating it 34 | speech_pad_ms: Final speech chunks are padded by speech_pad_ms each side 35 | """ 36 | 37 | threshold: float = 0.5 38 | neg_threshold: float = None 39 | min_speech_duration_ms: int = 0 40 | max_speech_duration_s: float = float("inf") 41 | min_silence_duration_ms: int = 2000 42 | speech_pad_ms: int = 400 43 | 44 | 45 | def get_speech_timestamps( 46 | audio: np.ndarray, 47 | vad_options: Optional[VadOptions] = None, 48 | sampling_rate: int = 16000, 49 | **kwargs, 50 | ) -> List[dict]: 51 | """This method is used for splitting long audios into speech chunks using silero VAD. 52 | 53 | Args: 54 | audio: One dimensional float array. 55 | vad_options: Options for VAD processing. 56 | sampling rate: Sampling rate of the audio. 57 | kwargs: VAD options passed as keyword arguments for backward compatibility. 58 | 59 | Returns: 60 | List of dicts containing begin and end samples of each speech chunk. 61 | """ 62 | if vad_options is None: 63 | vad_options = VadOptions(**kwargs) 64 | 65 | threshold = vad_options.threshold 66 | neg_threshold = vad_options.neg_threshold 67 | min_speech_duration_ms = vad_options.min_speech_duration_ms 68 | max_speech_duration_s = vad_options.max_speech_duration_s 69 | min_silence_duration_ms = vad_options.min_silence_duration_ms 70 | window_size_samples = 512 71 | speech_pad_ms = vad_options.speech_pad_ms 72 | min_speech_samples = sampling_rate * min_speech_duration_ms / 1000 73 | speech_pad_samples = sampling_rate * speech_pad_ms / 1000 74 | max_speech_samples = ( 75 | sampling_rate * max_speech_duration_s 76 | - window_size_samples 77 | - 2 * speech_pad_samples 78 | ) 79 | min_silence_samples = sampling_rate * min_silence_duration_ms / 1000 80 | min_silence_samples_at_max_speech = sampling_rate * 98 / 1000 81 | 82 | audio_length_samples = len(audio) 83 | 84 | model = get_vad_model() 85 | 86 | padded_audio = np.pad( 87 | audio, (0, window_size_samples - audio.shape[0] % window_size_samples) 88 | ) 89 | speech_probs = model(padded_audio.reshape(1, -1)).squeeze(0) 90 | 91 | triggered = False 92 | speeches = [] 93 | current_speech = {} 94 | if neg_threshold is None: 95 | neg_threshold = max(threshold - 0.15, 0.01) 96 | 97 | # to save potential segment end (and tolerate some silence) 98 | temp_end = 0 99 | # to save potential segment limits in case of maximum segment size reached 100 | prev_end = next_start = 0 101 | 102 | for i, speech_prob in enumerate(speech_probs): 103 | if (speech_prob >= threshold) and temp_end: 104 | temp_end = 0 105 | if next_start < prev_end: 106 | next_start = window_size_samples * i 107 | 108 | if (speech_prob >= threshold) and not triggered: 109 | triggered = True 110 | current_speech["start"] = window_size_samples * i 111 | continue 112 | 113 | if ( 114 | triggered 115 | and (window_size_samples * i) - current_speech["start"] > max_speech_samples 116 | ): 117 | if prev_end: 118 | current_speech["end"] = prev_end 119 | speeches.append(current_speech) 120 | current_speech = {} 121 | # previously reached silence (< neg_thres) and is still not speech (< thres) 122 | if next_start < prev_end: 123 | triggered = False 124 | else: 125 | current_speech["start"] = next_start 126 | prev_end = next_start = temp_end = 0 127 | else: 128 | current_speech["end"] = window_size_samples * i 129 | speeches.append(current_speech) 130 | current_speech = {} 131 | prev_end = next_start = temp_end = 0 132 | triggered = False 133 | continue 134 | 135 | if (speech_prob < neg_threshold) and triggered: 136 | if not temp_end: 137 | temp_end = window_size_samples * i 138 | # condition to avoid cutting in very short silence 139 | if (window_size_samples * i) - temp_end > min_silence_samples_at_max_speech: 140 | prev_end = temp_end 141 | if (window_size_samples * i) - temp_end < min_silence_samples: 142 | continue 143 | else: 144 | current_speech["end"] = temp_end 145 | if ( 146 | current_speech["end"] - current_speech["start"] 147 | ) > min_speech_samples: 148 | speeches.append(current_speech) 149 | current_speech = {} 150 | prev_end = next_start = temp_end = 0 151 | triggered = False 152 | continue 153 | 154 | if ( 155 | current_speech 156 | and (audio_length_samples - current_speech["start"]) > min_speech_samples 157 | ): 158 | current_speech["end"] = audio_length_samples 159 | speeches.append(current_speech) 160 | 161 | for i, speech in enumerate(speeches): 162 | if i == 0: 163 | speech["start"] = int(max(0, speech["start"] - speech_pad_samples)) 164 | if i != len(speeches) - 1: 165 | silence_duration = speeches[i + 1]["start"] - speech["end"] 166 | if silence_duration < 2 * speech_pad_samples: 167 | speech["end"] += int(silence_duration // 2) 168 | speeches[i + 1]["start"] = int( 169 | max(0, speeches[i + 1]["start"] - silence_duration // 2) 170 | ) 171 | else: 172 | speech["end"] = int( 173 | min(audio_length_samples, speech["end"] + speech_pad_samples) 174 | ) 175 | speeches[i + 1]["start"] = int( 176 | max(0, speeches[i + 1]["start"] - speech_pad_samples) 177 | ) 178 | else: 179 | speech["end"] = int( 180 | min(audio_length_samples, speech["end"] + speech_pad_samples) 181 | ) 182 | 183 | return speeches 184 | 185 | 186 | def collect_chunks( 187 | audio: np.ndarray, chunks: List[dict], sampling_rate: int = 16000 188 | ) -> Tuple[List[np.ndarray], List[Dict[str, int]]]: 189 | """Collects audio chunks.""" 190 | if not chunks: 191 | chunk_metadata = { 192 | "start_time": 0, 193 | "end_time": 0, 194 | } 195 | return [np.array([], dtype=np.float32)], [chunk_metadata] 196 | 197 | audio_chunks = [] 198 | chunks_metadata = [] 199 | for chunk in chunks: 200 | chunk_metadata = { 201 | "start_time": chunk["start"] / sampling_rate, 202 | "end_time": chunk["end"] / sampling_rate, 203 | } 204 | audio_chunks.append(audio[chunk["start"] : chunk["end"]]) 205 | chunks_metadata.append(chunk_metadata) 206 | return audio_chunks, chunks_metadata 207 | 208 | 209 | class SpeechTimestampsMap: 210 | """Helper class to restore original speech timestamps.""" 211 | 212 | def __init__(self, chunks: List[dict], sampling_rate: int, time_precision: int = 2): 213 | self.sampling_rate = sampling_rate 214 | self.time_precision = time_precision 215 | self.chunk_end_sample = [] 216 | self.total_silence_before = [] 217 | 218 | previous_end = 0 219 | silent_samples = 0 220 | 221 | for chunk in chunks: 222 | silent_samples += chunk["start"] - previous_end 223 | previous_end = chunk["end"] 224 | 225 | self.chunk_end_sample.append(chunk["end"] - silent_samples) 226 | self.total_silence_before.append(silent_samples / sampling_rate) 227 | 228 | def get_original_time( 229 | self, 230 | time: float, 231 | chunk_index: Optional[int] = None, 232 | ) -> float: 233 | if chunk_index is None: 234 | chunk_index = self.get_chunk_index(time) 235 | 236 | total_silence_before = self.total_silence_before[chunk_index] 237 | return round(total_silence_before + time, self.time_precision) 238 | 239 | def get_chunk_index(self, time: float) -> int: 240 | sample = int(time * self.sampling_rate) 241 | return min( 242 | bisect.bisect(self.chunk_end_sample, sample), 243 | len(self.chunk_end_sample) - 1, 244 | ) 245 | 246 | 247 | @functools.lru_cache 248 | def get_vad_model(): 249 | """Returns the VAD model instance.""" 250 | encoder_path = os.path.join(get_assets_path(), "silero_encoder_v5.onnx") 251 | decoder_path = os.path.join(get_assets_path(), "silero_decoder_v5.onnx") 252 | return SileroVADModel(encoder_path, decoder_path) 253 | 254 | 255 | class SileroVADModel: 256 | def __init__(self, encoder_path, decoder_path): 257 | try: 258 | import onnxruntime 259 | except ImportError as e: 260 | raise RuntimeError( 261 | "Applying the VAD filter requires the onnxruntime package" 262 | ) from e 263 | 264 | opts = onnxruntime.SessionOptions() 265 | opts.inter_op_num_threads = 1 266 | opts.intra_op_num_threads = 1 267 | opts.enable_cpu_mem_arena = False 268 | opts.log_severity_level = 4 269 | 270 | self.encoder_session = onnxruntime.InferenceSession( 271 | encoder_path, 272 | providers=["CPUExecutionProvider"], 273 | sess_options=opts, 274 | ) 275 | self.decoder_session = onnxruntime.InferenceSession( 276 | decoder_path, 277 | providers=["CPUExecutionProvider"], 278 | sess_options=opts, 279 | ) 280 | 281 | def __call__( 282 | self, audio: np.ndarray, num_samples: int = 512, context_size_samples: int = 64 283 | ): 284 | assert ( 285 | audio.ndim == 2 286 | ), "Input should be a 2D array with size (batch_size, num_samples)" 287 | assert ( 288 | audio.shape[1] % num_samples == 0 289 | ), "Input size should be a multiple of num_samples" 290 | 291 | batch_size = audio.shape[0] 292 | 293 | state = np.zeros((2, batch_size, 128), dtype="float32") 294 | context = np.zeros( 295 | (batch_size, context_size_samples), 296 | dtype="float32", 297 | ) 298 | 299 | batched_audio = audio.reshape(batch_size, -1, num_samples) 300 | context = batched_audio[..., -context_size_samples:] 301 | context[:, -1] = 0 302 | context = np.roll(context, 1, 1) 303 | batched_audio = np.concatenate([context, batched_audio], 2) 304 | 305 | batched_audio = batched_audio.reshape(-1, num_samples + context_size_samples) 306 | 307 | encoder_batch_size = 10000 308 | num_segments = batched_audio.shape[0] 309 | encoder_outputs = [] 310 | for i in range(0, num_segments, encoder_batch_size): 311 | encoder_output = self.encoder_session.run( 312 | None, {"input": batched_audio[i : i + encoder_batch_size]} 313 | )[0] 314 | encoder_outputs.append(encoder_output) 315 | 316 | encoder_output = np.concatenate(encoder_outputs, axis=0) 317 | encoder_output = encoder_output.reshape(batch_size, -1, 128) 318 | 319 | decoder_outputs = [] 320 | for window in np.split(encoder_output, encoder_output.shape[1], axis=1): 321 | out, state = self.decoder_session.run( 322 | None, {"input": window.squeeze(1), "state": state} 323 | ) 324 | decoder_outputs.append(out) 325 | 326 | out = np.stack(decoder_outputs, axis=1).squeeze(-1) 327 | return out 328 | 329 | 330 | def merge_segments(segments_list, vad_options: VadOptions, sampling_rate: int = 16000): 331 | if not segments_list: 332 | return [] 333 | 334 | curr_end = 0 335 | seg_idxs = [] 336 | merged_segments = [] 337 | edge_padding = vad_options.speech_pad_ms * sampling_rate // 1000 338 | chunk_length = vad_options.max_speech_duration_s * sampling_rate 339 | 340 | curr_start = segments_list[0]["start"] 341 | 342 | for idx, seg in enumerate(segments_list): 343 | # if any segment start timing is less than previous segment end timing, 344 | # reset the edge padding. Similarly for end timing. 345 | if idx > 0: 346 | if seg["start"] < segments_list[idx - 1]["end"]: 347 | seg["start"] += edge_padding 348 | if idx < len(segments_list) - 1: 349 | if seg["end"] > segments_list[idx + 1]["start"]: 350 | seg["end"] -= edge_padding 351 | 352 | if seg["end"] - curr_start > chunk_length and curr_end - curr_start > 0: 353 | merged_segments.append( 354 | { 355 | "start": curr_start, 356 | "end": curr_end, 357 | "segments": seg_idxs, 358 | } 359 | ) 360 | curr_start = seg["start"] 361 | seg_idxs = [] 362 | curr_end = seg["end"] 363 | seg_idxs.append((seg["start"], seg["end"])) 364 | # add final 365 | merged_segments.append( 366 | { 367 | "start": curr_start, 368 | "end": curr_end, 369 | "segments": seg_idxs, 370 | } 371 | ) 372 | return merged_segments 373 | -------------------------------------------------------------------------------- /faster_whisper/version.py: -------------------------------------------------------------------------------- 1 | """Version information.""" 2 | 3 | __version__ = "1.1.1" 4 | -------------------------------------------------------------------------------- /requirements.conversion.txt: -------------------------------------------------------------------------------- 1 | transformers[torch]>=4.23 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ctranslate2>=4.0,<5 2 | huggingface_hub>=0.13 3 | tokenizers>=0.13,<1 4 | onnxruntime>=1.14,<2 5 | av>=11 6 | tqdm -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 3 | ignore = 4 | E203, 5 | W503, 6 | 7 | [isort] 8 | profile=black 9 | lines_between_types=1 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import find_packages, setup 4 | 5 | base_dir = os.path.dirname(os.path.abspath(__file__)) 6 | 7 | 8 | def get_long_description(): 9 | readme_path = os.path.join(base_dir, "README.md") 10 | with open(readme_path, encoding="utf-8") as readme_file: 11 | return readme_file.read() 12 | 13 | 14 | def get_project_version(): 15 | version_path = os.path.join(base_dir, "faster_whisper", "version.py") 16 | version = {} 17 | with open(version_path, encoding="utf-8") as fp: 18 | exec(fp.read(), version) 19 | return version["__version__"] 20 | 21 | 22 | def get_requirements(path): 23 | with open(path, encoding="utf-8") as requirements: 24 | return [requirement.strip() for requirement in requirements] 25 | 26 | 27 | install_requires = get_requirements(os.path.join(base_dir, "requirements.txt")) 28 | conversion_requires = get_requirements( 29 | os.path.join(base_dir, "requirements.conversion.txt") 30 | ) 31 | 32 | setup( 33 | name="faster-whisper", 34 | version=get_project_version(), 35 | license="MIT", 36 | description="Faster Whisper transcription with CTranslate2", 37 | long_description=get_long_description(), 38 | long_description_content_type="text/markdown", 39 | author="Guillaume Klein", 40 | url="https://github.com/SYSTRAN/faster-whisper", 41 | classifiers=[ 42 | "Development Status :: 4 - Beta", 43 | "Intended Audience :: Developers", 44 | "Intended Audience :: Science/Research", 45 | "License :: OSI Approved :: MIT License", 46 | "Programming Language :: Python :: 3", 47 | "Programming Language :: Python :: 3 :: Only", 48 | "Programming Language :: Python :: 3.9", 49 | "Programming Language :: Python :: 3.10", 50 | "Programming Language :: Python :: 3.11", 51 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 52 | ], 53 | keywords="openai whisper speech ctranslate2 inference quantization transformer", 54 | python_requires=">=3.9", 55 | install_requires=install_requires, 56 | extras_require={ 57 | "conversion": conversion_requires, 58 | "dev": [ 59 | "black==23.*", 60 | "flake8==6.*", 61 | "isort==5.*", 62 | "pytest==7.*", 63 | ], 64 | }, 65 | packages=find_packages(), 66 | include_package_data=True, 67 | ) 68 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | 6 | @pytest.fixture 7 | def data_dir(): 8 | return os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") 9 | 10 | 11 | @pytest.fixture 12 | def jfk_path(data_dir): 13 | return os.path.join(data_dir, "jfk.flac") 14 | 15 | 16 | @pytest.fixture 17 | def physcisworks_path(data_dir): 18 | return os.path.join(data_dir, "physicsworks.wav") 19 | -------------------------------------------------------------------------------- /tests/data/hotwords.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/hotwords.mp3 -------------------------------------------------------------------------------- /tests/data/jfk.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/jfk.flac -------------------------------------------------------------------------------- /tests/data/multilingual.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/multilingual.mp3 -------------------------------------------------------------------------------- /tests/data/physicsworks.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/physicsworks.wav -------------------------------------------------------------------------------- /tests/data/stereo_diarization.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/stereo_diarization.wav -------------------------------------------------------------------------------- /tests/test_tokenizer.py: -------------------------------------------------------------------------------- 1 | from faster_whisper import WhisperModel 2 | from faster_whisper.tokenizer import Tokenizer 3 | from faster_whisper.transcribe import get_suppressed_tokens 4 | 5 | 6 | def test_suppressed_tokens_minus_1(): 7 | model = WhisperModel("tiny.en") 8 | 9 | tokenizer = Tokenizer(model.hf_tokenizer, False) 10 | tokens = get_suppressed_tokens(tokenizer, [-1]) 11 | assert tokens == ( 12 | 1, 13 | 2, 14 | 7, 15 | 8, 16 | 9, 17 | 10, 18 | 14, 19 | 25, 20 | 26, 21 | 27, 22 | 28, 23 | 29, 24 | 31, 25 | 58, 26 | 59, 27 | 60, 28 | 61, 29 | 62, 30 | 63, 31 | 90, 32 | 91, 33 | 92, 34 | 93, 35 | 357, 36 | 366, 37 | 438, 38 | 532, 39 | 685, 40 | 705, 41 | 796, 42 | 930, 43 | 1058, 44 | 1220, 45 | 1267, 46 | 1279, 47 | 1303, 48 | 1343, 49 | 1377, 50 | 1391, 51 | 1635, 52 | 1782, 53 | 1875, 54 | 2162, 55 | 2361, 56 | 2488, 57 | 3467, 58 | 4008, 59 | 4211, 60 | 4600, 61 | 4808, 62 | 5299, 63 | 5855, 64 | 6329, 65 | 7203, 66 | 9609, 67 | 9959, 68 | 10563, 69 | 10786, 70 | 11420, 71 | 11709, 72 | 11907, 73 | 13163, 74 | 13697, 75 | 13700, 76 | 14808, 77 | 15306, 78 | 16410, 79 | 16791, 80 | 17992, 81 | 19203, 82 | 19510, 83 | 20724, 84 | 22305, 85 | 22935, 86 | 27007, 87 | 30109, 88 | 30420, 89 | 33409, 90 | 34949, 91 | 40283, 92 | 40493, 93 | 40549, 94 | 47282, 95 | 49146, 96 | 50257, 97 | 50357, 98 | 50358, 99 | 50359, 100 | 50360, 101 | ) 102 | 103 | 104 | def test_suppressed_tokens_minus_value(): 105 | model = WhisperModel("tiny.en") 106 | 107 | tokenizer = Tokenizer(model.hf_tokenizer, False) 108 | tokens = get_suppressed_tokens(tokenizer, [13]) 109 | assert tokens == (13, 50257, 50357, 50358, 50359, 50360) 110 | 111 | 112 | def test_split_on_unicode(): 113 | model = WhisperModel("tiny") 114 | tokenizer = Tokenizer(model.hf_tokenizer, False) 115 | 116 | tokens = [8404, 871, 287, 6, 246, 526, 3210, 20378] 117 | words, word_tokens = tokenizer.split_tokens_on_unicode(tokens) 118 | 119 | assert words == [" elle", " est", " l", "'", "\ufffd", "é", "rit", "oire"] 120 | assert word_tokens == [[8404], [871], [287], [6], [246], [526], [3210], [20378]] 121 | -------------------------------------------------------------------------------- /tests/test_transcribe.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | 4 | import numpy as np 5 | 6 | from faster_whisper import BatchedInferencePipeline, WhisperModel, decode_audio 7 | 8 | 9 | def test_supported_languages(): 10 | model = WhisperModel("tiny.en") 11 | assert model.supported_languages == ["en"] 12 | 13 | 14 | def test_transcribe(jfk_path): 15 | model = WhisperModel("tiny") 16 | segments, info = model.transcribe(jfk_path, word_timestamps=True) 17 | assert info.all_language_probs is not None 18 | 19 | assert info.language == "en" 20 | assert info.language_probability > 0.9 21 | assert info.duration == 11 22 | 23 | # Get top language info from all results, which should match the 24 | # already existing metadata 25 | top_lang, top_lang_score = info.all_language_probs[0] 26 | assert info.language == top_lang 27 | assert abs(info.language_probability - top_lang_score) < 1e-16 28 | 29 | segments = list(segments) 30 | 31 | assert len(segments) == 1 32 | 33 | segment = segments[0] 34 | 35 | assert segment.text == ( 36 | " And so my fellow Americans, ask not what your country can do for you, " 37 | "ask what you can do for your country." 38 | ) 39 | 40 | assert segment.text == "".join(word.word for word in segment.words) 41 | assert segment.start == segment.words[0].start 42 | assert segment.end == segment.words[-1].end 43 | batched_model = BatchedInferencePipeline(model=model) 44 | result, info = batched_model.transcribe( 45 | jfk_path, word_timestamps=True, vad_filter=False 46 | ) 47 | assert info.language == "en" 48 | assert info.language_probability > 0.7 49 | segments = [] 50 | for segment in result: 51 | segments.append( 52 | {"start": segment.start, "end": segment.end, "text": segment.text} 53 | ) 54 | 55 | assert len(segments) == 1 56 | assert segment.text == ( 57 | " And so my fellow Americans ask not what your country can do for you, " 58 | "ask what you can do for your country." 59 | ) 60 | 61 | 62 | def test_batched_transcribe(physcisworks_path): 63 | model = WhisperModel("tiny") 64 | batched_model = BatchedInferencePipeline(model=model) 65 | result, info = batched_model.transcribe(physcisworks_path, batch_size=16) 66 | assert info.language == "en" 67 | assert info.language_probability > 0.7 68 | segments = [] 69 | for segment in result: 70 | segments.append( 71 | {"start": segment.start, "end": segment.end, "text": segment.text} 72 | ) 73 | # number of near 30 sec segments 74 | assert len(segments) == 7 75 | 76 | result, info = batched_model.transcribe( 77 | physcisworks_path, 78 | batch_size=16, 79 | without_timestamps=False, 80 | word_timestamps=True, 81 | ) 82 | segments = [] 83 | for segment in result: 84 | assert segment.words is not None 85 | segments.append( 86 | {"start": segment.start, "end": segment.end, "text": segment.text} 87 | ) 88 | assert len(segments) > 7 89 | 90 | 91 | def test_empty_audio(): 92 | audio = np.asarray([], dtype="float32") 93 | model = WhisperModel("tiny") 94 | pipeline = BatchedInferencePipeline(model=model) 95 | assert list(model.transcribe(audio)[0]) == [] 96 | assert list(pipeline.transcribe(audio)[0]) == [] 97 | model.detect_language(audio) 98 | 99 | 100 | def test_prefix_with_timestamps(jfk_path): 101 | model = WhisperModel("tiny") 102 | segments, _ = model.transcribe(jfk_path, prefix="And so my fellow Americans") 103 | segments = list(segments) 104 | 105 | assert len(segments) == 1 106 | 107 | segment = segments[0] 108 | 109 | assert segment.text == ( 110 | " And so my fellow Americans, ask not what your country can do for you, " 111 | "ask what you can do for your country." 112 | ) 113 | 114 | assert segment.start == 0 115 | assert 10 < segment.end <= 11 116 | 117 | 118 | def test_vad(jfk_path): 119 | model = WhisperModel("tiny") 120 | segments, info = model.transcribe( 121 | jfk_path, 122 | vad_filter=True, 123 | vad_parameters=dict(min_silence_duration_ms=500, speech_pad_ms=200), 124 | ) 125 | segments = list(segments) 126 | 127 | assert len(segments) == 1 128 | segment = segments[0] 129 | 130 | assert segment.text == ( 131 | " And so my fellow Americans ask not what your country can do for you, " 132 | "ask what you can do for your country." 133 | ) 134 | 135 | assert 0 < segment.start < 1 136 | assert 10 < segment.end < 11 137 | 138 | assert info.vad_options.min_silence_duration_ms == 500 139 | assert info.vad_options.speech_pad_ms == 200 140 | 141 | 142 | def test_stereo_diarization(data_dir): 143 | model = WhisperModel("tiny") 144 | 145 | audio_path = os.path.join(data_dir, "stereo_diarization.wav") 146 | left, right = decode_audio(audio_path, split_stereo=True) 147 | 148 | segments, _ = model.transcribe(left) 149 | transcription = "".join(segment.text for segment in segments).strip() 150 | assert transcription == ( 151 | "He began a confused complaint against the wizard, " 152 | "who had vanished behind the curtain on the left." 153 | ) 154 | 155 | segments, _ = model.transcribe(right) 156 | transcription = "".join(segment.text for segment in segments).strip() 157 | assert transcription == "The horizon seems extremely distant." 158 | 159 | 160 | def test_multilingual_transcription(data_dir): 161 | model = WhisperModel("tiny") 162 | pipeline = BatchedInferencePipeline(model) 163 | 164 | audio_path = os.path.join(data_dir, "multilingual.mp3") 165 | audio = decode_audio(audio_path) 166 | 167 | segments, info = model.transcribe( 168 | audio, 169 | multilingual=True, 170 | without_timestamps=True, 171 | condition_on_previous_text=False, 172 | ) 173 | segments = list(segments) 174 | 175 | assert ( 176 | segments[0].text 177 | == " Permission is hereby granted, free of charge, to any person obtaining a copy of the" 178 | " software and associated documentation files to deal in the software without restriction," 179 | " including without limitation the rights to use, copy, modify, merge, publish, distribute" 180 | ", sublicence, and or cell copies of the software, and to permit persons to whom the " 181 | "software is furnished to do so, subject to the following conditions. The above copyright" 182 | " notice and this permission notice, shall be included in all copies or substantial " 183 | "portions of the software." 184 | ) 185 | 186 | assert ( 187 | segments[1].text 188 | == " Jedem, der dieses Software und die dazu gehöregen Dokumentationsdatein erhält, wird " 189 | "hiermit unengeltlich die Genehmigung erteilt, wird der Software und eingeschränkt zu " 190 | "verfahren. Dies umfasst insbesondere das Recht, die Software zu verwenden, zu " 191 | "vervielfältigen, zu modifizieren, zu Samenzofügen, zu veröffentlichen, zu verteilen, " 192 | "unterzulizenzieren und oder kopieren der Software zu verkaufen und diese Rechte " 193 | "unterfolgen den Bedingungen anderen zu übertragen." 194 | ) 195 | 196 | segments, info = pipeline.transcribe(audio, multilingual=True) 197 | segments = list(segments) 198 | 199 | assert ( 200 | segments[0].text 201 | == " Permission is hereby granted, free of charge, to any person obtaining a copy of the" 202 | " software and associated documentation files to deal in the software without restriction," 203 | " including without limitation the rights to use, copy, modify, merge, publish, distribute" 204 | ", sublicence, and or cell copies of the software, and to permit persons to whom the " 205 | "software is furnished to do so, subject to the following conditions. The above copyright" 206 | " notice and this permission notice, shall be included in all copies or substantial " 207 | "portions of the software." 208 | ) 209 | assert ( 210 | "Dokumentationsdatein erhält, wird hiermit unengeltlich die Genehmigung erteilt," 211 | " wird der Software und eingeschränkt zu verfahren. Dies umfasst insbesondere das Recht," 212 | " die Software zu verwenden, zu vervielfältigen, zu modifizieren" 213 | in segments[1].text 214 | ) 215 | 216 | 217 | def test_hotwords(data_dir): 218 | model = WhisperModel("tiny") 219 | pipeline = BatchedInferencePipeline(model) 220 | 221 | audio_path = os.path.join(data_dir, "hotwords.mp3") 222 | audio = decode_audio(audio_path) 223 | 224 | segments, info = model.transcribe(audio, hotwords="ComfyUI") 225 | segments = list(segments) 226 | 227 | assert "ComfyUI" in segments[0].text 228 | assert info.transcription_options.hotwords == "ComfyUI" 229 | 230 | segments, info = pipeline.transcribe(audio, hotwords="ComfyUI") 231 | segments = list(segments) 232 | 233 | assert "ComfyUI" in segments[0].text 234 | assert info.transcription_options.hotwords == "ComfyUI" 235 | 236 | 237 | def test_transcribe_signature(): 238 | model_transcribe_args = set(inspect.getargs(WhisperModel.transcribe.__code__).args) 239 | pipeline_transcribe_args = set( 240 | inspect.getargs(BatchedInferencePipeline.transcribe.__code__).args 241 | ) 242 | pipeline_transcribe_args.remove("batch_size") 243 | 244 | assert model_transcribe_args == pipeline_transcribe_args 245 | 246 | 247 | def test_monotonic_timestamps(physcisworks_path): 248 | model = WhisperModel("tiny") 249 | pipeline = BatchedInferencePipeline(model=model) 250 | 251 | segments, info = model.transcribe(physcisworks_path, word_timestamps=True) 252 | segments = list(segments) 253 | 254 | for i in range(len(segments) - 1): 255 | assert segments[i].start <= segments[i].end 256 | assert segments[i].end <= segments[i + 1].start 257 | for word in segments[i].words: 258 | assert word.start <= word.end 259 | assert word.end <= segments[i].end 260 | assert segments[-1].end <= info.duration 261 | 262 | segments, info = pipeline.transcribe(physcisworks_path, word_timestamps=True) 263 | segments = list(segments) 264 | 265 | for i in range(len(segments) - 1): 266 | assert segments[i].start <= segments[i].end 267 | assert segments[i].end <= segments[i + 1].start 268 | for word in segments[i].words: 269 | assert word.start <= word.end 270 | assert word.end <= segments[i].end 271 | assert segments[-1].end <= info.duration 272 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from faster_whisper import available_models, download_model 4 | 5 | 6 | def test_available_models(): 7 | models = available_models() 8 | assert isinstance(models, list) 9 | assert "tiny" in models 10 | 11 | 12 | def test_download_model(tmpdir): 13 | output_dir = str(tmpdir.join("model")) 14 | 15 | model_dir = download_model("tiny", output_dir=output_dir) 16 | 17 | assert model_dir == output_dir 18 | assert os.path.isdir(model_dir) 19 | assert not os.path.islink(model_dir) 20 | 21 | for filename in os.listdir(model_dir): 22 | path = os.path.join(model_dir, filename) 23 | assert not os.path.islink(path) 24 | 25 | 26 | def test_download_model_in_cache(tmpdir): 27 | cache_dir = str(tmpdir.join("model")) 28 | download_model("tiny", cache_dir=cache_dir) 29 | assert os.path.isdir(cache_dir) 30 | --------------------------------------------------------------------------------