├── .github
└── workflows
│ └── ci.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── benchmark
├── benchmark.m4a
├── evaluate_yt_commons.py
├── memory_benchmark.py
├── normalizer.json
├── requirements.benchmark.txt
├── speed_benchmark.py
├── utils.py
└── wer_benchmark.py
├── docker
├── Dockerfile
├── infer.py
└── jfk.flac
├── faster_whisper
├── __init__.py
├── assets
│ ├── __init__.py
│ ├── silero_decoder_v5.onnx
│ └── silero_encoder_v5.onnx
├── audio.py
├── feature_extractor.py
├── tokenizer.py
├── transcribe.py
├── utils.py
├── vad.py
└── version.py
├── requirements.conversion.txt
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
├── conftest.py
├── data
├── hotwords.mp3
├── jfk.flac
├── multilingual.mp3
├── physicsworks.wav
└── stereo_diarization.wav
├── test_tokenizer.py
├── test_transcribe.py
└── test_utils.py
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | tags:
8 | - v*
9 | pull_request:
10 | branches:
11 | - master
12 |
13 | jobs:
14 | check-code-format:
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - uses: actions/checkout@v4
19 |
20 | - name: Set up Python 3.9
21 | uses: actions/setup-python@v5
22 | with:
23 | python-version: 3.9
24 |
25 | - name: Install module
26 | run: |
27 | pip install wheel
28 | pip install -e .[dev]
29 |
30 | - name: Check code format with Black
31 | run: |
32 | black --check .
33 |
34 | - name: Check imports order with isort
35 | run: |
36 | isort --check-only .
37 |
38 | - name: Check code style with Flake8
39 | if: ${{ always() }}
40 | run: |
41 | flake8 .
42 |
43 |
44 | run-tests:
45 | runs-on: ubuntu-latest
46 |
47 | steps:
48 | - uses: actions/checkout@v4
49 |
50 | - name: Set up Python 3.9
51 | uses: actions/setup-python@v5
52 | with:
53 | python-version: 3.9
54 |
55 | - name: Install module
56 | run: |
57 | pip install wheel
58 | pip install -e .[dev]
59 |
60 | - name: Run pytest
61 | run: |
62 | pytest -v tests/
63 |
64 |
65 | build-and-push-package:
66 | runs-on: ubuntu-latest
67 | needs: [check-code-format, run-tests]
68 |
69 | steps:
70 | - uses: actions/checkout@v4
71 |
72 | - name: Set up Python 3.9
73 | uses: actions/setup-python@v5
74 | with:
75 | python-version: 3.9
76 |
77 | - name: Install dependencies
78 | run: |
79 | pip install wheel
80 |
81 | - name: Build package
82 | run: |
83 | python3 setup.py sdist bdist_wheel
84 |
85 | - name: Push package on PyPI
86 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
87 | uses: pypa/gh-action-pypi-publish@release/v1
88 | with:
89 | user: __token__
90 | password: ${{ secrets.PYPI_API_TOKEN }}
91 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / Optimized / DLL Files
2 | *.pyc
3 | *.pyo
4 | *.pyd
5 | __pycache__/
6 |
7 | # Distribution / Packaging
8 | venv/
9 |
10 | # Unit Test
11 | .pytest_cache/
12 |
13 | # Ignore IDE, Editor Files
14 | .idea/
15 | .vscode/
16 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to faster-whisper
2 |
3 | Contributions are welcome! Here are some pointers to help you install the library for development and validate your changes before submitting a pull request.
4 |
5 | ## Install the library for development
6 |
7 | We recommend installing the module in editable mode with the `dev` extra requirements:
8 |
9 | ```bash
10 | git clone https://github.com/SYSTRAN/faster-whisper.git
11 | cd faster-whisper/
12 | pip install -e .[dev]
13 | ```
14 |
15 | ## Validate the changes before creating a pull request
16 |
17 | 1. Make sure the existing tests are still passing (and consider adding new tests as well!):
18 |
19 | ```bash
20 | pytest tests/
21 | ```
22 |
23 | 2. Reformat and validate the code with the following tools:
24 |
25 | ```bash
26 | black .
27 | isort .
28 | flake8 .
29 | ```
30 |
31 | These steps are also run automatically in the CI when you open the pull request.
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 SYSTRAN
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include faster_whisper/assets/silero_encoder_v5.onnx
2 | include faster_whisper/assets/silero_decoder_v5.onnx
3 | include requirements.txt
4 | include requirements.conversion.txt
5 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/SYSTRAN/faster-whisper/actions?query=workflow%3ACI) [](https://badge.fury.io/py/faster-whisper)
2 |
3 | # Faster Whisper transcription with CTranslate2
4 |
5 | **faster-whisper** is a reimplementation of OpenAI's Whisper model using [CTranslate2](https://github.com/OpenNMT/CTranslate2/), which is a fast inference engine for Transformer models.
6 |
7 | This implementation is up to 4 times faster than [openai/whisper](https://github.com/openai/whisper) for the same accuracy while using less memory. The efficiency can be further improved with 8-bit quantization on both CPU and GPU.
8 |
9 | ## Benchmark
10 |
11 | ### Whisper
12 |
13 | For reference, here's the time and memory usage that are required to transcribe [**13 minutes**](https://www.youtube.com/watch?v=0u7tTptBo9I) of audio using different implementations:
14 |
15 | * [openai/whisper](https://github.com/openai/whisper)@[v20240930](https://github.com/openai/whisper/tree/v20240930)
16 | * [whisper.cpp](https://github.com/ggerganov/whisper.cpp)@[v1.7.2](https://github.com/ggerganov/whisper.cpp/tree/v1.7.2)
17 | * [transformers](https://github.com/huggingface/transformers)@[v4.46.3](https://github.com/huggingface/transformers/tree/v4.46.3)
18 | * [faster-whisper](https://github.com/SYSTRAN/faster-whisper)@[v1.1.0](https://github.com/SYSTRAN/faster-whisper/tree/v1.1.0)
19 |
20 | ### Large-v2 model on GPU
21 |
22 | | Implementation | Precision | Beam size | Time | VRAM Usage |
23 | | --- | --- | --- | --- | --- |
24 | | openai/whisper | fp16 | 5 | 2m23s | 4708MB |
25 | | whisper.cpp (Flash Attention) | fp16 | 5 | 1m05s | 4127MB |
26 | | transformers (SDPA)[^1] | fp16 | 5 | 1m52s | 4960MB |
27 | | faster-whisper | fp16 | 5 | 1m03s | 4525MB |
28 | | faster-whisper (`batch_size=8`) | fp16 | 5 | 17s | 6090MB |
29 | | faster-whisper | int8 | 5 | 59s | 2926MB |
30 | | faster-whisper (`batch_size=8`) | int8 | 5 | 16s | 4500MB |
31 |
32 | ### distil-whisper-large-v3 model on GPU
33 |
34 | | Implementation | Precision | Beam size | Time | YT Commons WER |
35 | | --- | --- | --- | --- | --- |
36 | | transformers (SDPA) (`batch_size=16`) | fp16 | 5 | 46m12s | 14.801 |
37 | | faster-whisper (`batch_size=16`) | fp16 | 5 | 25m50s | 13.527 |
38 |
39 | *GPU Benchmarks are Executed with CUDA 12.4 on a NVIDIA RTX 3070 Ti 8GB.*
40 | [^1]: transformers OOM for any batch size > 1
41 |
42 | ### Small model on CPU
43 |
44 | | Implementation | Precision | Beam size | Time | RAM Usage |
45 | | --- | --- | --- | --- | --- |
46 | | openai/whisper | fp32 | 5 | 6m58s | 2335MB |
47 | | whisper.cpp | fp32 | 5 | 2m05s | 1049MB |
48 | | whisper.cpp (OpenVINO) | fp32 | 5 | 1m45s | 1642MB |
49 | | faster-whisper | fp32 | 5 | 2m37s | 2257MB |
50 | | faster-whisper (`batch_size=8`) | fp32 | 5 | 1m06s | 4230MB |
51 | | faster-whisper | int8 | 5 | 1m42s | 1477MB |
52 | | faster-whisper (`batch_size=8`) | int8 | 5 | 51s | 3608MB |
53 |
54 | *Executed with 8 threads on an Intel Core i7-12700K.*
55 |
56 |
57 | ## Requirements
58 |
59 | * Python 3.9 or greater
60 |
61 | Unlike openai-whisper, FFmpeg does **not** need to be installed on the system. The audio is decoded with the Python library [PyAV](https://github.com/PyAV-Org/PyAV) which bundles the FFmpeg libraries in its package.
62 |
63 | ### GPU
64 |
65 | GPU execution requires the following NVIDIA libraries to be installed:
66 |
67 | * [cuBLAS for CUDA 12](https://developer.nvidia.com/cublas)
68 | * [cuDNN 9 for CUDA 12](https://developer.nvidia.com/cudnn)
69 |
70 | **Note**: The latest versions of `ctranslate2` only support CUDA 12 and cuDNN 9. For CUDA 11 and cuDNN 8, the current workaround is downgrading to the `3.24.0` version of `ctranslate2`, for CUDA 12 and cuDNN 8, downgrade to the `4.4.0` version of `ctranslate2`, (This can be done with `pip install --force-reinstall ctranslate2==4.4.0` or specifying the version in a `requirements.txt`).
71 |
72 | There are multiple ways to install the NVIDIA libraries mentioned above. The recommended way is described in the official NVIDIA documentation, but we also suggest other installation methods below.
73 |
74 |
75 | Other installation methods (click to expand)
76 |
77 |
78 | **Note:** For all these methods below, keep in mind the above note regarding CUDA versions. Depending on your setup, you may need to install the _CUDA 11_ versions of libraries that correspond to the CUDA 12 libraries listed in the instructions below.
79 |
80 | #### Use Docker
81 |
82 | The libraries (cuBLAS, cuDNN) are installed in this official NVIDIA CUDA Docker images: `nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04`.
83 |
84 | #### Install with `pip` (Linux only)
85 |
86 | On Linux these libraries can be installed with `pip`. Note that `LD_LIBRARY_PATH` must be set before launching Python.
87 |
88 | ```bash
89 | pip install nvidia-cublas-cu12 nvidia-cudnn-cu12==9.*
90 |
91 | export LD_LIBRARY_PATH=`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'`
92 | ```
93 |
94 | #### Download the libraries from Purfview's repository (Windows & Linux)
95 |
96 | Purfview's [whisper-standalone-win](https://github.com/Purfview/whisper-standalone-win) provides the required NVIDIA libraries for Windows & Linux in a [single archive](https://github.com/Purfview/whisper-standalone-win/releases/tag/libs). Decompress the archive and place the libraries in a directory included in the `PATH`.
97 |
98 |
99 |
100 | ## Installation
101 |
102 | The module can be installed from [PyPI](https://pypi.org/project/faster-whisper/):
103 |
104 | ```bash
105 | pip install faster-whisper
106 | ```
107 |
108 |
109 | Other installation methods (click to expand)
110 |
111 | ### Install the master branch
112 |
113 | ```bash
114 | pip install --force-reinstall "faster-whisper @ https://github.com/SYSTRAN/faster-whisper/archive/refs/heads/master.tar.gz"
115 | ```
116 |
117 | ### Install a specific commit
118 |
119 | ```bash
120 | pip install --force-reinstall "faster-whisper @ https://github.com/SYSTRAN/faster-whisper/archive/a4f1cc8f11433e454c3934442b5e1a4ed5e865c3.tar.gz"
121 | ```
122 |
123 |
124 |
125 | ## Usage
126 |
127 | ### Faster-whisper
128 |
129 | ```python
130 | from faster_whisper import WhisperModel
131 |
132 | model_size = "large-v3"
133 |
134 | # Run on GPU with FP16
135 | model = WhisperModel(model_size, device="cuda", compute_type="float16")
136 |
137 | # or run on GPU with INT8
138 | # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
139 | # or run on CPU with INT8
140 | # model = WhisperModel(model_size, device="cpu", compute_type="int8")
141 |
142 | segments, info = model.transcribe("audio.mp3", beam_size=5)
143 |
144 | print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
145 |
146 | for segment in segments:
147 | print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
148 | ```
149 |
150 | **Warning:** `segments` is a *generator* so the transcription only starts when you iterate over it. The transcription can be run to completion by gathering the segments in a list or a `for` loop:
151 |
152 | ```python
153 | segments, _ = model.transcribe("audio.mp3")
154 | segments = list(segments) # The transcription will actually run here.
155 | ```
156 |
157 | ### Batched Transcription
158 | The following code snippet illustrates how to run batched transcription on an example audio file. `BatchedInferencePipeline.transcribe` is a drop-in replacement for `WhisperModel.transcribe`
159 |
160 | ```python
161 | from faster_whisper import WhisperModel, BatchedInferencePipeline
162 |
163 | model = WhisperModel("turbo", device="cuda", compute_type="float16")
164 | batched_model = BatchedInferencePipeline(model=model)
165 | segments, info = batched_model.transcribe("audio.mp3", batch_size=16)
166 |
167 | for segment in segments:
168 | print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
169 | ```
170 |
171 | ### Faster Distil-Whisper
172 |
173 | The Distil-Whisper checkpoints are compatible with the Faster-Whisper package. In particular, the latest [distil-large-v3](https://huggingface.co/distil-whisper/distil-large-v3)
174 | checkpoint is intrinsically designed to work with the Faster-Whisper transcription algorithm. The following code snippet
175 | demonstrates how to run inference with distil-large-v3 on a specified audio file:
176 |
177 | ```python
178 | from faster_whisper import WhisperModel
179 |
180 | model_size = "distil-large-v3"
181 |
182 | model = WhisperModel(model_size, device="cuda", compute_type="float16")
183 | segments, info = model.transcribe("audio.mp3", beam_size=5, language="en", condition_on_previous_text=False)
184 |
185 | for segment in segments:
186 | print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
187 | ```
188 |
189 | For more information about the distil-large-v3 model, refer to the original [model card](https://huggingface.co/distil-whisper/distil-large-v3).
190 |
191 | ### Word-level timestamps
192 |
193 | ```python
194 | segments, _ = model.transcribe("audio.mp3", word_timestamps=True)
195 |
196 | for segment in segments:
197 | for word in segment.words:
198 | print("[%.2fs -> %.2fs] %s" % (word.start, word.end, word.word))
199 | ```
200 |
201 | ### VAD filter
202 |
203 | The library integrates the [Silero VAD](https://github.com/snakers4/silero-vad) model to filter out parts of the audio without speech:
204 |
205 | ```python
206 | segments, _ = model.transcribe("audio.mp3", vad_filter=True)
207 | ```
208 |
209 | The default behavior is conservative and only removes silence longer than 2 seconds. See the available VAD parameters and default values in the [source code](https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/vad.py). They can be customized with the dictionary argument `vad_parameters`:
210 |
211 | ```python
212 | segments, _ = model.transcribe(
213 | "audio.mp3",
214 | vad_filter=True,
215 | vad_parameters=dict(min_silence_duration_ms=500),
216 | )
217 | ```
218 | Vad filter is enabled by default for batched transcription.
219 |
220 | ### Logging
221 |
222 | The library logging level can be configured like this:
223 |
224 | ```python
225 | import logging
226 |
227 | logging.basicConfig()
228 | logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
229 | ```
230 |
231 | ### Going further
232 |
233 | See more model and transcription options in the [`WhisperModel`](https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/transcribe.py) class implementation.
234 |
235 | ## Community integrations
236 |
237 | Here is a non exhaustive list of open-source projects using faster-whisper. Feel free to add your project to the list!
238 |
239 |
240 | * [speaches](https://github.com/speaches-ai/speaches) is an OpenAI compatible server using `faster-whisper`. It's easily deployable with Docker, works with OpenAI SDKs/CLI, supports streaming, and live transcription.
241 | * [WhisperX](https://github.com/m-bain/whisperX) is an award-winning Python library that offers speaker diarization and accurate word-level timestamps using wav2vec2 alignment
242 | * [whisper-ctranslate2](https://github.com/Softcatala/whisper-ctranslate2) is a command line client based on faster-whisper and compatible with the original client from openai/whisper.
243 | * [whisper-diarize](https://github.com/MahmoudAshraf97/whisper-diarization) is a speaker diarization tool that is based on faster-whisper and NVIDIA NeMo.
244 | * [whisper-standalone-win](https://github.com/Purfview/whisper-standalone-win) Standalone CLI executables of faster-whisper for Windows, Linux & macOS.
245 | * [asr-sd-pipeline](https://github.com/hedrergudene/asr-sd-pipeline) provides a scalable, modular, end to end multi-speaker speech to text solution implemented using AzureML pipelines.
246 | * [Open-Lyrics](https://github.com/zh-plus/Open-Lyrics) is a Python library that transcribes voice files using faster-whisper, and translates/polishes the resulting text into `.lrc` files in the desired language using OpenAI-GPT.
247 | * [wscribe](https://github.com/geekodour/wscribe) is a flexible transcript generation tool supporting faster-whisper, it can export word level transcript and the exported transcript then can be edited with [wscribe-editor](https://github.com/geekodour/wscribe-editor)
248 | * [aTrain](https://github.com/BANDAS-Center/aTrain) is a graphical user interface implementation of faster-whisper developed at the BANDAS-Center at the University of Graz for transcription and diarization in Windows ([Windows Store App](https://apps.microsoft.com/detail/atrain/9N15Q44SZNS2)) and Linux.
249 | * [Whisper-Streaming](https://github.com/ufal/whisper_streaming) implements real-time mode for offline Whisper-like speech-to-text models with faster-whisper as the most recommended back-end. It implements a streaming policy with self-adaptive latency based on the actual source complexity, and demonstrates the state of the art.
250 | * [WhisperLive](https://github.com/collabora/WhisperLive) is a nearly-live implementation of OpenAI's Whisper which uses faster-whisper as the backend to transcribe audio in real-time.
251 | * [Faster-Whisper-Transcriber](https://github.com/BBC-Esq/ctranslate2-faster-whisper-transcriber) is a simple but reliable voice transcriber that provides a user-friendly interface.
252 | * [Open-dubbing](https://github.com/softcatala/open-dubbing) is open dubbing is an AI dubbing system which uses machine learning models to automatically translate and synchronize audio dialogue into different languages.
253 |
254 | ## Model conversion
255 |
256 | When loading a model from its size such as `WhisperModel("large-v3")`, the corresponding CTranslate2 model is automatically downloaded from the [Hugging Face Hub](https://huggingface.co/Systran).
257 |
258 | We also provide a script to convert any Whisper models compatible with the Transformers library. They could be the original OpenAI models or user fine-tuned models.
259 |
260 | For example the command below converts the [original "large-v3" Whisper model](https://huggingface.co/openai/whisper-large-v3) and saves the weights in FP16:
261 |
262 | ```bash
263 | pip install transformers[torch]>=4.23
264 |
265 | ct2-transformers-converter --model openai/whisper-large-v3 --output_dir whisper-large-v3-ct2
266 | --copy_files tokenizer.json preprocessor_config.json --quantization float16
267 | ```
268 |
269 | * The option `--model` accepts a model name on the Hub or a path to a model directory.
270 | * If the option `--copy_files tokenizer.json` is not used, the tokenizer configuration is automatically downloaded when the model is loaded later.
271 |
272 | Models can also be converted from the code. See the [conversion API](https://opennmt.net/CTranslate2/python/ctranslate2.converters.TransformersConverter.html).
273 |
274 | ### Load a converted model
275 |
276 | 1. Directly load the model from a local directory:
277 | ```python
278 | model = faster_whisper.WhisperModel("whisper-large-v3-ct2")
279 | ```
280 |
281 | 2. [Upload your model to the Hugging Face Hub](https://huggingface.co/docs/transformers/model_sharing#upload-with-the-web-interface) and load it from its name:
282 | ```python
283 | model = faster_whisper.WhisperModel("username/whisper-large-v3-ct2")
284 | ```
285 |
286 | ## Comparing performance against other implementations
287 |
288 | If you are comparing the performance against other Whisper implementations, you should make sure to run the comparison with similar settings. In particular:
289 |
290 | * Verify that the same transcription options are used, especially the same beam size. For example in openai/whisper, `model.transcribe` uses a default beam size of 1 but here we use a default beam size of 5.
291 | * Transcription speed is closely affected by the number of words in the transcript, so ensure that other implementations have a similar WER (Word Error Rate) to this one.
292 | * When running on CPU, make sure to set the same number of threads. Many frameworks will read the environment variable `OMP_NUM_THREADS`, which can be set when running your script:
293 |
294 | ```bash
295 | OMP_NUM_THREADS=4 python3 my_script.py
296 | ```
297 |
--------------------------------------------------------------------------------
/benchmark/benchmark.m4a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/benchmark/benchmark.m4a
--------------------------------------------------------------------------------
/benchmark/evaluate_yt_commons.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import os
4 |
5 | from io import BytesIO
6 |
7 | from datasets import load_dataset
8 | from jiwer import wer
9 | from pytubefix import YouTube
10 | from pytubefix.exceptions import VideoUnavailable
11 | from tqdm import tqdm
12 | from transformers.models.whisper.english_normalizer import EnglishTextNormalizer
13 |
14 | from faster_whisper import BatchedInferencePipeline, WhisperModel, decode_audio
15 |
16 |
17 | def url_to_audio(row):
18 | buffer = BytesIO()
19 | yt = YouTube(row["link"])
20 | try:
21 | video = (
22 | yt.streams.filter(only_audio=True, mime_type="audio/mp4")
23 | .order_by("bitrate")
24 | .desc()
25 | .last()
26 | )
27 | video.stream_to_buffer(buffer)
28 | buffer.seek(0)
29 | row["audio"] = decode_audio(buffer)
30 | except VideoUnavailable:
31 | print(f'Failed to download: {row["link"]}')
32 | row["audio"] = []
33 | return row
34 |
35 |
36 | parser = argparse.ArgumentParser(description="WER benchmark")
37 | parser.add_argument(
38 | "--audio_numb",
39 | type=int,
40 | default=None,
41 | help="Specify the number of validation audio files in the dataset."
42 | " Set to None to retrieve all audio files.",
43 | )
44 | args = parser.parse_args()
45 |
46 | with open(os.path.join(os.path.dirname(__file__), "normalizer.json"), "r") as f:
47 | normalizer = EnglishTextNormalizer(json.load(f))
48 |
49 | dataset = load_dataset("mobiuslabsgmbh/youtube-commons-asr-eval", streaming=True).map(
50 | url_to_audio
51 | )
52 | model = WhisperModel("large-v3", device="cuda")
53 | pipeline = BatchedInferencePipeline(model, device="cuda")
54 |
55 |
56 | all_transcriptions = []
57 | all_references = []
58 | # iterate over the dataset and run inference
59 | for i, row in tqdm(enumerate(dataset["test"]), desc="Evaluating..."):
60 | if not row["audio"]:
61 | continue
62 | result, info = pipeline.transcribe(
63 | row["audio"][0],
64 | batch_size=8,
65 | word_timestamps=False,
66 | without_timestamps=True,
67 | )
68 |
69 | all_transcriptions.append("".join(segment.text for segment in result))
70 | all_references.append(row["text"][0])
71 | if args.audio_numb and i == (args.audio_numb - 1):
72 | break
73 |
74 | # normalize predictions and references
75 | all_transcriptions = [normalizer(transcription) for transcription in all_transcriptions]
76 | all_references = [normalizer(reference) for reference in all_references]
77 |
78 | # compute the WER metric
79 | word_error_rate = 100 * wer(hypothesis=all_transcriptions, reference=all_references)
80 | print("WER: %.3f" % word_error_rate)
81 |
--------------------------------------------------------------------------------
/benchmark/memory_benchmark.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import time
3 |
4 | from typing import Callable
5 |
6 | import py3nvml.py3nvml as nvml
7 |
8 | from memory_profiler import memory_usage
9 | from utils import MyThread, get_logger, inference
10 |
11 | logger = get_logger("faster-whisper")
12 | parser = argparse.ArgumentParser(description="Memory benchmark")
13 | parser.add_argument(
14 | "--gpu_memory", action="store_true", help="Measure GPU memory usage"
15 | )
16 | parser.add_argument("--device-index", type=int, default=0, help="GPU device index")
17 | parser.add_argument(
18 | "--interval",
19 | type=float,
20 | default=0.5,
21 | help="Interval at which measurements are collected",
22 | )
23 | args = parser.parse_args()
24 | device_idx = args.device_index
25 | interval = args.interval
26 |
27 |
28 | def measure_memory(func: Callable[[], None]):
29 | if args.gpu_memory:
30 | logger.info(
31 | "Measuring maximum GPU memory usage on GPU device."
32 | " Make sure to not have additional processes running on the same GPU."
33 | )
34 | # init nvml
35 | nvml.nvmlInit()
36 | handle = nvml.nvmlDeviceGetHandleByIndex(device_idx)
37 | gpu_name = nvml.nvmlDeviceGetName(handle)
38 | gpu_memory_limit = nvml.nvmlDeviceGetMemoryInfo(handle).total >> 20
39 | gpu_power_limit = nvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000.0
40 | info = {"gpu_memory_usage": [], "gpu_power_usage": []}
41 |
42 | def _get_gpu_info():
43 | while True:
44 | info["gpu_memory_usage"].append(
45 | nvml.nvmlDeviceGetMemoryInfo(handle).used >> 20
46 | )
47 | info["gpu_power_usage"].append(
48 | nvml.nvmlDeviceGetPowerUsage(handle) / 1000
49 | )
50 | time.sleep(interval)
51 |
52 | if stop:
53 | break
54 |
55 | return info
56 |
57 | stop = False
58 | thread = MyThread(_get_gpu_info, params=())
59 | thread.start()
60 | func()
61 | stop = True
62 | thread.join()
63 | result = thread.get_result()
64 |
65 | # shutdown nvml
66 | nvml.nvmlShutdown()
67 | max_memory_usage = max(result["gpu_memory_usage"])
68 | max_power_usage = max(result["gpu_power_usage"])
69 | print("GPU name: %s" % gpu_name)
70 | print("GPU device index: %s" % device_idx)
71 | print(
72 | "Maximum GPU memory usage: %dMiB / %dMiB (%.2f%%)"
73 | % (
74 | max_memory_usage,
75 | gpu_memory_limit,
76 | (max_memory_usage / gpu_memory_limit) * 100,
77 | )
78 | )
79 | print(
80 | "Maximum GPU power usage: %dW / %dW (%.2f%%)"
81 | % (
82 | max_power_usage,
83 | gpu_power_limit,
84 | (max_power_usage / gpu_power_limit) * 100,
85 | )
86 | )
87 | else:
88 | logger.info("Measuring maximum increase of memory usage.")
89 | max_usage = memory_usage(func, max_usage=True, interval=interval)
90 | print("Maximum increase of RAM memory usage: %d MiB" % max_usage)
91 |
92 |
93 | if __name__ == "__main__":
94 | measure_memory(inference)
95 |
--------------------------------------------------------------------------------
/benchmark/normalizer.json:
--------------------------------------------------------------------------------
1 | {
2 | "accessorise": "accessorize",
3 | "accessorised": "accessorized",
4 | "accessorises": "accessorizes",
5 | "accessorising": "accessorizing",
6 | "acclimatisation": "acclimatization",
7 | "acclimatise": "acclimatize",
8 | "acclimatised": "acclimatized",
9 | "acclimatises": "acclimatizes",
10 | "acclimatising": "acclimatizing",
11 | "accoutrements": "accouterments",
12 | "aeon": "eon",
13 | "aeons": "eons",
14 | "aerogramme": "aerogram",
15 | "aerogrammes": "aerograms",
16 | "aeroplane": "airplane",
17 | "aeroplanes": "airplanes",
18 | "aesthete": "esthete",
19 | "aesthetes": "esthetes",
20 | "aesthetic": "esthetic",
21 | "aesthetically": "esthetically",
22 | "aesthetics": "esthetics",
23 | "aetiology": "etiology",
24 | "ageing": "aging",
25 | "aggrandisement": "aggrandizement",
26 | "agonise": "agonize",
27 | "agonised": "agonized",
28 | "agonises": "agonizes",
29 | "agonising": "agonizing",
30 | "agonisingly": "agonizingly",
31 | "almanack": "almanac",
32 | "almanacks": "almanacs",
33 | "aluminium": "aluminum",
34 | "amortisable": "amortizable",
35 | "amortisation": "amortization",
36 | "amortisations": "amortizations",
37 | "amortise": "amortize",
38 | "amortised": "amortized",
39 | "amortises": "amortizes",
40 | "amortising": "amortizing",
41 | "amphitheatre": "amphitheater",
42 | "amphitheatres": "amphitheaters",
43 | "anaemia": "anemia",
44 | "anaemic": "anemic",
45 | "anaesthesia": "anesthesia",
46 | "anaesthetic": "anesthetic",
47 | "anaesthetics": "anesthetics",
48 | "anaesthetise": "anesthetize",
49 | "anaesthetised": "anesthetized",
50 | "anaesthetises": "anesthetizes",
51 | "anaesthetising": "anesthetizing",
52 | "anaesthetist": "anesthetist",
53 | "anaesthetists": "anesthetists",
54 | "anaesthetize": "anesthetize",
55 | "anaesthetized": "anesthetized",
56 | "anaesthetizes": "anesthetizes",
57 | "anaesthetizing": "anesthetizing",
58 | "analogue": "analog",
59 | "analogues": "analogs",
60 | "analyse": "analyze",
61 | "analysed": "analyzed",
62 | "analyses": "analyzes",
63 | "analysing": "analyzing",
64 | "anglicise": "anglicize",
65 | "anglicised": "anglicized",
66 | "anglicises": "anglicizes",
67 | "anglicising": "anglicizing",
68 | "annualised": "annualized",
69 | "antagonise": "antagonize",
70 | "antagonised": "antagonized",
71 | "antagonises": "antagonizes",
72 | "antagonising": "antagonizing",
73 | "apologise": "apologize",
74 | "apologised": "apologized",
75 | "apologises": "apologizes",
76 | "apologising": "apologizing",
77 | "appal": "appall",
78 | "appals": "appalls",
79 | "appetiser": "appetizer",
80 | "appetisers": "appetizers",
81 | "appetising": "appetizing",
82 | "appetisingly": "appetizingly",
83 | "arbour": "arbor",
84 | "arbours": "arbors",
85 | "archaeologically": "archeologically",
86 | "archaeologist": "archeologist",
87 | "archaeologists": "archeologists",
88 | "archaeology": "archeology",
89 | "archeological": "archaeological",
90 | "ardour": "ardor",
91 | "armour": "armor",
92 | "armoured": "armored",
93 | "armourer": "armorer",
94 | "armourers": "armorers",
95 | "armouries": "armories",
96 | "armoury": "armory",
97 | "artefact": "artifact",
98 | "artefacts": "artifacts",
99 | "authorise": "authorize",
100 | "authorised": "authorized",
101 | "authorises": "authorizes",
102 | "authorising": "authorizing",
103 | "axe": "ax",
104 | "backpedalled": "backpedaled",
105 | "backpedalling": "backpedaling",
106 | "bannister": "banister",
107 | "bannisters": "banisters",
108 | "baptise": "baptize",
109 | "baptised": "baptized",
110 | "baptises": "baptizes",
111 | "baptising": "baptizing",
112 | "bastardise": "bastardize",
113 | "bastardised": "bastardized",
114 | "bastardises": "bastardizes",
115 | "bastardising": "bastardizing",
116 | "battleax": "battleaxe",
117 | "baulk": "balk",
118 | "baulked": "balked",
119 | "baulking": "balking",
120 | "baulks": "balks",
121 | "bedevilled": "bedeviled",
122 | "bedevilling": "bedeviling",
123 | "behaviour": "behavior",
124 | "behavioural": "behavioral",
125 | "behaviourism": "behaviorism",
126 | "behaviourist": "behaviorist",
127 | "behaviourists": "behaviorists",
128 | "behaviours": "behaviors",
129 | "behove": "behoove",
130 | "behoved": "behooved",
131 | "behoves": "behooves",
132 | "bejewelled": "bejeweled",
133 | "belabour": "belabor",
134 | "belaboured": "belabored",
135 | "belabouring": "belaboring",
136 | "belabours": "belabors",
137 | "bevelled": "beveled",
138 | "bevvies": "bevies",
139 | "bevvy": "bevy",
140 | "biassed": "biased",
141 | "biassing": "biasing",
142 | "bingeing": "binging",
143 | "bougainvillaea": "bougainvillea",
144 | "bougainvillaeas": "bougainvilleas",
145 | "bowdlerise": "bowdlerize",
146 | "bowdlerised": "bowdlerized",
147 | "bowdlerises": "bowdlerizes",
148 | "bowdlerising": "bowdlerizing",
149 | "breathalyse": "breathalyze",
150 | "breathalysed": "breathalyzed",
151 | "breathalyser": "breathalyzer",
152 | "breathalysers": "breathalyzers",
153 | "breathalyses": "breathalyzes",
154 | "breathalysing": "breathalyzing",
155 | "brutalise": "brutalize",
156 | "brutalised": "brutalized",
157 | "brutalises": "brutalizes",
158 | "brutalising": "brutalizing",
159 | "busses": "buses",
160 | "bussing": "busing",
161 | "caesarean": "cesarean",
162 | "caesareans": "cesareans",
163 | "calibre": "caliber",
164 | "calibres": "calibers",
165 | "calliper": "caliper",
166 | "callipers": "calipers",
167 | "callisthenics": "calisthenics",
168 | "canalise": "canalize",
169 | "canalised": "canalized",
170 | "canalises": "canalizes",
171 | "canalising": "canalizing",
172 | "cancelation": "cancellation",
173 | "cancelations": "cancellations",
174 | "cancelled": "canceled",
175 | "cancelling": "canceling",
176 | "candour": "candor",
177 | "cannibalise": "cannibalize",
178 | "cannibalised": "cannibalized",
179 | "cannibalises": "cannibalizes",
180 | "cannibalising": "cannibalizing",
181 | "canonise": "canonize",
182 | "canonised": "canonized",
183 | "canonises": "canonizes",
184 | "canonising": "canonizing",
185 | "capitalise": "capitalize",
186 | "capitalised": "capitalized",
187 | "capitalises": "capitalizes",
188 | "capitalising": "capitalizing",
189 | "caramelise": "caramelize",
190 | "caramelised": "caramelized",
191 | "caramelises": "caramelizes",
192 | "caramelising": "caramelizing",
193 | "carbonise": "carbonize",
194 | "carbonised": "carbonized",
195 | "carbonises": "carbonizes",
196 | "carbonising": "carbonizing",
197 | "carolled": "caroled",
198 | "carolling": "caroling",
199 | "catalogue": "catalog",
200 | "catalogued": "cataloged",
201 | "catalogues": "catalogs",
202 | "cataloguing": "cataloging",
203 | "catalyse": "catalyze",
204 | "catalysed": "catalyzed",
205 | "catalyses": "catalyzes",
206 | "catalysing": "catalyzing",
207 | "categorise": "categorize",
208 | "categorised": "categorized",
209 | "categorises": "categorizes",
210 | "categorising": "categorizing",
211 | "cauterise": "cauterize",
212 | "cauterised": "cauterized",
213 | "cauterises": "cauterizes",
214 | "cauterising": "cauterizing",
215 | "cavilled": "caviled",
216 | "cavilling": "caviling",
217 | "centigramme": "centigram",
218 | "centigrammes": "centigrams",
219 | "centilitre": "centiliter",
220 | "centilitres": "centiliters",
221 | "centimetre": "centimeter",
222 | "centimetres": "centimeters",
223 | "centralise": "centralize",
224 | "centralised": "centralized",
225 | "centralises": "centralizes",
226 | "centralising": "centralizing",
227 | "centre": "center",
228 | "centred": "centered",
229 | "centrefold": "centerfold",
230 | "centrefolds": "centerfolds",
231 | "centrepiece": "centerpiece",
232 | "centrepieces": "centerpieces",
233 | "centres": "centers",
234 | "channelled": "channeled",
235 | "channelling": "channeling",
236 | "characterise": "characterize",
237 | "characterised": "characterized",
238 | "characterises": "characterizes",
239 | "characterising": "characterizing",
240 | "cheque": "check",
241 | "chequebook": "checkbook",
242 | "chequebooks": "checkbooks",
243 | "chequered": "checkered",
244 | "cheques": "checks",
245 | "chilli": "chili",
246 | "chimaera": "chimera",
247 | "chimaeras": "chimeras",
248 | "chiselled": "chiseled",
249 | "chiselling": "chiseling",
250 | "circularise": "circularize",
251 | "circularised": "circularized",
252 | "circularises": "circularizes",
253 | "circularising": "circularizing",
254 | "civilise": "civilize",
255 | "civilised": "civilized",
256 | "civilises": "civilizes",
257 | "civilising": "civilizing",
258 | "clamour": "clamor",
259 | "clamoured": "clamored",
260 | "clamouring": "clamoring",
261 | "clamours": "clamors",
262 | "clangour": "clangor",
263 | "clarinettist": "clarinetist",
264 | "clarinettists": "clarinetists",
265 | "collectivise": "collectivize",
266 | "collectivised": "collectivized",
267 | "collectivises": "collectivizes",
268 | "collectivising": "collectivizing",
269 | "colonisation": "colonization",
270 | "colonise": "colonize",
271 | "colonised": "colonized",
272 | "coloniser": "colonizer",
273 | "colonisers": "colonizers",
274 | "colonises": "colonizes",
275 | "colonising": "colonizing",
276 | "colour": "color",
277 | "colourant": "colorant",
278 | "colourants": "colorants",
279 | "coloured": "colored",
280 | "coloureds": "coloreds",
281 | "colourful": "colorful",
282 | "colourfully": "colorfully",
283 | "colouring": "coloring",
284 | "colourize": "colorize",
285 | "colourized": "colorized",
286 | "colourizes": "colorizes",
287 | "colourizing": "colorizing",
288 | "colourless": "colorless",
289 | "colours": "colors",
290 | "commercialise": "commercialize",
291 | "commercialised": "commercialized",
292 | "commercialises": "commercializes",
293 | "commercialising": "commercializing",
294 | "compartmentalise": "compartmentalize",
295 | "compartmentalised": "compartmentalized",
296 | "compartmentalises": "compartmentalizes",
297 | "compartmentalising": "compartmentalizing",
298 | "computerise": "computerize",
299 | "computerised": "computerized",
300 | "computerises": "computerizes",
301 | "computerising": "computerizing",
302 | "conceptualise": "conceptualize",
303 | "conceptualised": "conceptualized",
304 | "conceptualises": "conceptualizes",
305 | "conceptualising": "conceptualizing",
306 | "connexion": "connection",
307 | "connexions": "connections",
308 | "contextualise": "contextualize",
309 | "contextualised": "contextualized",
310 | "contextualises": "contextualizes",
311 | "contextualising": "contextualizing",
312 | "cosier": "cozier",
313 | "cosies": "cozies",
314 | "cosiest": "coziest",
315 | "cosily": "cozily",
316 | "cosiness": "coziness",
317 | "cosy": "cozy",
318 | "councillor": "councilor",
319 | "councillors": "councilors",
320 | "counselled": "counseled",
321 | "counselling": "counseling",
322 | "counsellor": "counselor",
323 | "counsellors": "counselors",
324 | "crenelated": "crenellated",
325 | "criminalise": "criminalize",
326 | "criminalised": "criminalized",
327 | "criminalises": "criminalizes",
328 | "criminalising": "criminalizing",
329 | "criticise": "criticize",
330 | "criticised": "criticized",
331 | "criticises": "criticizes",
332 | "criticising": "criticizing",
333 | "crueller": "crueler",
334 | "cruellest": "cruelest",
335 | "crystallisation": "crystallization",
336 | "crystallise": "crystallize",
337 | "crystallised": "crystallized",
338 | "crystallises": "crystallizes",
339 | "crystallising": "crystallizing",
340 | "cudgelled": "cudgeled",
341 | "cudgelling": "cudgeling",
342 | "customise": "customize",
343 | "customised": "customized",
344 | "customises": "customizes",
345 | "customising": "customizing",
346 | "cypher": "cipher",
347 | "cyphers": "ciphers",
348 | "decentralisation": "decentralization",
349 | "decentralise": "decentralize",
350 | "decentralised": "decentralized",
351 | "decentralises": "decentralizes",
352 | "decentralising": "decentralizing",
353 | "decriminalisation": "decriminalization",
354 | "decriminalise": "decriminalize",
355 | "decriminalised": "decriminalized",
356 | "decriminalises": "decriminalizes",
357 | "decriminalising": "decriminalizing",
358 | "defence": "defense",
359 | "defenceless": "defenseless",
360 | "defences": "defenses",
361 | "dehumanisation": "dehumanization",
362 | "dehumanise": "dehumanize",
363 | "dehumanised": "dehumanized",
364 | "dehumanises": "dehumanizes",
365 | "dehumanising": "dehumanizing",
366 | "demeanour": "demeanor",
367 | "demilitarisation": "demilitarization",
368 | "demilitarise": "demilitarize",
369 | "demilitarised": "demilitarized",
370 | "demilitarises": "demilitarizes",
371 | "demilitarising": "demilitarizing",
372 | "demobilisation": "demobilization",
373 | "demobilise": "demobilize",
374 | "demobilised": "demobilized",
375 | "demobilises": "demobilizes",
376 | "demobilising": "demobilizing",
377 | "democratisation": "democratization",
378 | "democratise": "democratize",
379 | "democratised": "democratized",
380 | "democratises": "democratizes",
381 | "democratising": "democratizing",
382 | "demonise": "demonize",
383 | "demonised": "demonized",
384 | "demonises": "demonizes",
385 | "demonising": "demonizing",
386 | "demoralisation": "demoralization",
387 | "demoralise": "demoralize",
388 | "demoralised": "demoralized",
389 | "demoralises": "demoralizes",
390 | "demoralising": "demoralizing",
391 | "denationalisation": "denationalization",
392 | "denationalise": "denationalize",
393 | "denationalised": "denationalized",
394 | "denationalises": "denationalizes",
395 | "denationalising": "denationalizing",
396 | "deodorise": "deodorize",
397 | "deodorised": "deodorized",
398 | "deodorises": "deodorizes",
399 | "deodorising": "deodorizing",
400 | "depersonalise": "depersonalize",
401 | "depersonalised": "depersonalized",
402 | "depersonalises": "depersonalizes",
403 | "depersonalising": "depersonalizing",
404 | "deputise": "deputize",
405 | "deputised": "deputized",
406 | "deputises": "deputizes",
407 | "deputising": "deputizing",
408 | "desensitisation": "desensitization",
409 | "desensitise": "desensitize",
410 | "desensitised": "desensitized",
411 | "desensitises": "desensitizes",
412 | "desensitising": "desensitizing",
413 | "destabilisation": "destabilization",
414 | "destabilise": "destabilize",
415 | "destabilised": "destabilized",
416 | "destabilises": "destabilizes",
417 | "destabilising": "destabilizing",
418 | "dialled": "dialed",
419 | "dialling": "dialing",
420 | "dialogue": "dialog",
421 | "dialogues": "dialogs",
422 | "diarrhoea": "diarrhea",
423 | "digitise": "digitize",
424 | "digitised": "digitized",
425 | "digitises": "digitizes",
426 | "digitising": "digitizing",
427 | "disc": "disk",
428 | "discolour": "discolor",
429 | "discoloured": "discolored",
430 | "discolouring": "discoloring",
431 | "discolours": "discolors",
432 | "discs": "disks",
433 | "disembowelled": "disemboweled",
434 | "disembowelling": "disemboweling",
435 | "disfavour": "disfavor",
436 | "dishevelled": "disheveled",
437 | "dishonour": "dishonor",
438 | "dishonourable": "dishonorable",
439 | "dishonourably": "dishonorably",
440 | "dishonoured": "dishonored",
441 | "dishonouring": "dishonoring",
442 | "dishonours": "dishonors",
443 | "disorganisation": "disorganization",
444 | "disorganised": "disorganized",
445 | "distil": "distill",
446 | "distils": "distills",
447 | "dramatisation": "dramatization",
448 | "dramatisations": "dramatizations",
449 | "dramatise": "dramatize",
450 | "dramatised": "dramatized",
451 | "dramatises": "dramatizes",
452 | "dramatising": "dramatizing",
453 | "draught": "draft",
454 | "draughtboard": "draftboard",
455 | "draughtboards": "draftboards",
456 | "draughtier": "draftier",
457 | "draughtiest": "draftiest",
458 | "draughts": "drafts",
459 | "draughtsman": "draftsman",
460 | "draughtsmanship": "draftsmanship",
461 | "draughtsmen": "draftsmen",
462 | "draughtswoman": "draftswoman",
463 | "draughtswomen": "draftswomen",
464 | "draughty": "drafty",
465 | "drivelled": "driveled",
466 | "drivelling": "driveling",
467 | "duelled": "dueled",
468 | "duelling": "dueling",
469 | "economise": "economize",
470 | "economised": "economized",
471 | "economises": "economizes",
472 | "economising": "economizing",
473 | "editorialise": "editorialize",
474 | "editorialised": "editorialized",
475 | "editorialises": "editorializes",
476 | "editorialising": "editorializing",
477 | "edoema": "edema",
478 | "empathise": "empathize",
479 | "empathised": "empathized",
480 | "empathises": "empathizes",
481 | "empathising": "empathizing",
482 | "emphasise": "emphasize",
483 | "emphasised": "emphasized",
484 | "emphasises": "emphasizes",
485 | "emphasising": "emphasizing",
486 | "enamelled": "enameled",
487 | "enamelling": "enameling",
488 | "enamoured": "enamored",
489 | "encyclopaedia": "encyclopedia",
490 | "encyclopaedias": "encyclopedias",
491 | "encyclopaedic": "encyclopedic",
492 | "endeavour": "endeavor",
493 | "endeavoured": "endeavored",
494 | "endeavouring": "endeavoring",
495 | "endeavours": "endeavors",
496 | "energise": "energize",
497 | "energised": "energized",
498 | "energises": "energizes",
499 | "energising": "energizing",
500 | "enrol": "enroll",
501 | "enrols": "enrolls",
502 | "enthral": "enthrall",
503 | "enthrals": "enthralls",
504 | "epaulette": "epaulet",
505 | "epaulettes": "epaulets",
506 | "epicentre": "epicenter",
507 | "epicentres": "epicenters",
508 | "epilogue": "epilog",
509 | "epilogues": "epilogs",
510 | "epitomise": "epitomize",
511 | "epitomised": "epitomized",
512 | "epitomises": "epitomizes",
513 | "epitomising": "epitomizing",
514 | "equalisation": "equalization",
515 | "equalise": "equalize",
516 | "equalised": "equalized",
517 | "equaliser": "equalizer",
518 | "equalisers": "equalizers",
519 | "equalises": "equalizes",
520 | "equalising": "equalizing",
521 | "eulogise": "eulogize",
522 | "eulogised": "eulogized",
523 | "eulogises": "eulogizes",
524 | "eulogising": "eulogizing",
525 | "evangelise": "evangelize",
526 | "evangelised": "evangelized",
527 | "evangelises": "evangelizes",
528 | "evangelising": "evangelizing",
529 | "exorcise": "exorcize",
530 | "exorcised": "exorcized",
531 | "exorcises": "exorcizes",
532 | "exorcising": "exorcizing",
533 | "extemporisation": "extemporization",
534 | "extemporise": "extemporize",
535 | "extemporised": "extemporized",
536 | "extemporises": "extemporizes",
537 | "extemporising": "extemporizing",
538 | "externalisation": "externalization",
539 | "externalisations": "externalizations",
540 | "externalise": "externalize",
541 | "externalised": "externalized",
542 | "externalises": "externalizes",
543 | "externalising": "externalizing",
544 | "factorise": "factorize",
545 | "factorised": "factorized",
546 | "factorises": "factorizes",
547 | "factorising": "factorizing",
548 | "faecal": "fecal",
549 | "faeces": "feces",
550 | "familiarisation": "familiarization",
551 | "familiarise": "familiarize",
552 | "familiarised": "familiarized",
553 | "familiarises": "familiarizes",
554 | "familiarising": "familiarizing",
555 | "fantasise": "fantasize",
556 | "fantasised": "fantasized",
557 | "fantasises": "fantasizes",
558 | "fantasising": "fantasizing",
559 | "favour": "favor",
560 | "favourable": "favorable",
561 | "favourably": "favorably",
562 | "favoured": "favored",
563 | "favouring": "favoring",
564 | "favourite": "favorite",
565 | "favourites": "favorites",
566 | "favouritism": "favoritism",
567 | "favours": "favors",
568 | "feminise": "feminize",
569 | "feminised": "feminized",
570 | "feminises": "feminizes",
571 | "feminising": "feminizing",
572 | "fertilisation": "fertilization",
573 | "fertilise": "fertilize",
574 | "fertilised": "fertilized",
575 | "fertiliser": "fertilizer",
576 | "fertilisers": "fertilizers",
577 | "fertilises": "fertilizes",
578 | "fertilising": "fertilizing",
579 | "fervour": "fervor",
580 | "fibre": "fiber",
581 | "fibreglass": "fiberglass",
582 | "fibres": "fibers",
583 | "fictionalisation": "fictionalization",
584 | "fictionalisations": "fictionalizations",
585 | "fictionalise": "fictionalize",
586 | "fictionalised": "fictionalized",
587 | "fictionalises": "fictionalizes",
588 | "fictionalising": "fictionalizing",
589 | "fillet": "filet",
590 | "filleted": "fileted",
591 | "filleting": "fileting",
592 | "fillets": "filets",
593 | "finalisation": "finalization",
594 | "finalise": "finalize",
595 | "finalised": "finalized",
596 | "finalises": "finalizes",
597 | "finalising": "finalizing",
598 | "flautist": "flutist",
599 | "flautists": "flutists",
600 | "flavour": "flavor",
601 | "flavoured": "flavored",
602 | "flavouring": "flavoring",
603 | "flavourings": "flavorings",
604 | "flavourless": "flavorless",
605 | "flavours": "flavors",
606 | "flavoursome": "flavorsome",
607 | "flyer / flier": "flier / flyer",
608 | "foetal": "fetal",
609 | "foetid": "fetid",
610 | "foetus": "fetus",
611 | "foetuses": "fetuses",
612 | "formalisation": "formalization",
613 | "formalise": "formalize",
614 | "formalised": "formalized",
615 | "formalises": "formalizes",
616 | "formalising": "formalizing",
617 | "fossilisation": "fossilization",
618 | "fossilise": "fossilize",
619 | "fossilised": "fossilized",
620 | "fossilises": "fossilizes",
621 | "fossilising": "fossilizing",
622 | "fraternisation": "fraternization",
623 | "fraternise": "fraternize",
624 | "fraternised": "fraternized",
625 | "fraternises": "fraternizes",
626 | "fraternising": "fraternizing",
627 | "fulfil": "fulfill",
628 | "fulfilment": "fulfillment",
629 | "fulfils": "fulfills",
630 | "funnelled": "funneled",
631 | "funnelling": "funneling",
632 | "gage": "gauge",
633 | "gaged": "gauged",
634 | "gages": "gauges",
635 | "gaging": "gauging",
636 | "galvanise": "galvanize",
637 | "galvanised": "galvanized",
638 | "galvanises": "galvanizes",
639 | "galvanising": "galvanizing",
640 | "gambolled": "gamboled",
641 | "gambolling": "gamboling",
642 | "gaol": "jail",
643 | "gaolbird": "jailbird",
644 | "gaolbirds": "jailbirds",
645 | "gaolbreak": "jailbreak",
646 | "gaolbreaks": "jailbreaks",
647 | "gaoled": "jailed",
648 | "gaoler": "jailer",
649 | "gaolers": "jailers",
650 | "gaoling": "jailing",
651 | "gaols": "jails",
652 | "gasses": "gases",
653 | "generalisation": "generalization",
654 | "generalisations": "generalizations",
655 | "generalise": "generalize",
656 | "generalised": "generalized",
657 | "generalises": "generalizes",
658 | "generalising": "generalizing",
659 | "ghettoise": "ghettoize",
660 | "ghettoised": "ghettoized",
661 | "ghettoises": "ghettoizes",
662 | "ghettoising": "ghettoizing",
663 | "gipsies": "gypsies",
664 | "glamor": "glamour",
665 | "glamorise": "glamorize",
666 | "glamorised": "glamorized",
667 | "glamorises": "glamorizes",
668 | "glamorising": "glamorizing",
669 | "globalisation": "globalization",
670 | "globalise": "globalize",
671 | "globalised": "globalized",
672 | "globalises": "globalizes",
673 | "globalising": "globalizing",
674 | "glueing": "gluing",
675 | "goitre": "goiter",
676 | "goitres": "goiters",
677 | "gonorrhoea": "gonorrhea",
678 | "gramme": "gram",
679 | "grammes": "grams",
680 | "gravelled": "graveled",
681 | "grey": "gray",
682 | "greyed": "grayed",
683 | "greying": "graying",
684 | "greyish": "grayish",
685 | "greyness": "grayness",
686 | "greys": "grays",
687 | "grovelled": "groveled",
688 | "grovelling": "groveling",
689 | "groyne": "groin",
690 | "groynes": "groins",
691 | "gruelling": "grueling",
692 | "gruellingly": "gruelingly",
693 | "gryphon": "griffin",
694 | "gryphons": "griffins",
695 | "gynaecological": "gynecological",
696 | "gynaecologist": "gynecologist",
697 | "gynaecologists": "gynecologists",
698 | "gynaecology": "gynecology",
699 | "haematological": "hematological",
700 | "haematologist": "hematologist",
701 | "haematologists": "hematologists",
702 | "haematology": "hematology",
703 | "haemoglobin": "hemoglobin",
704 | "haemophilia": "hemophilia",
705 | "haemophiliac": "hemophiliac",
706 | "haemophiliacs": "hemophiliacs",
707 | "haemorrhage": "hemorrhage",
708 | "haemorrhaged": "hemorrhaged",
709 | "haemorrhages": "hemorrhages",
710 | "haemorrhaging": "hemorrhaging",
711 | "haemorrhoids": "hemorrhoids",
712 | "harbour": "harbor",
713 | "harboured": "harbored",
714 | "harbouring": "harboring",
715 | "harbours": "harbors",
716 | "harmonisation": "harmonization",
717 | "harmonise": "harmonize",
718 | "harmonised": "harmonized",
719 | "harmonises": "harmonizes",
720 | "harmonising": "harmonizing",
721 | "homoeopath": "homeopath",
722 | "homoeopathic": "homeopathic",
723 | "homoeopaths": "homeopaths",
724 | "homoeopathy": "homeopathy",
725 | "homogenise": "homogenize",
726 | "homogenised": "homogenized",
727 | "homogenises": "homogenizes",
728 | "homogenising": "homogenizing",
729 | "honour": "honor",
730 | "honourable": "honorable",
731 | "honourably": "honorably",
732 | "honoured": "honored",
733 | "honouring": "honoring",
734 | "honours": "honors",
735 | "hospitalisation": "hospitalization",
736 | "hospitalise": "hospitalize",
737 | "hospitalised": "hospitalized",
738 | "hospitalises": "hospitalizes",
739 | "hospitalising": "hospitalizing",
740 | "humanise": "humanize",
741 | "humanised": "humanized",
742 | "humanises": "humanizes",
743 | "humanising": "humanizing",
744 | "humour": "humor",
745 | "humoured": "humored",
746 | "humouring": "humoring",
747 | "humourless": "humorless",
748 | "humours": "humors",
749 | "hybridise": "hybridize",
750 | "hybridised": "hybridized",
751 | "hybridises": "hybridizes",
752 | "hybridising": "hybridizing",
753 | "hypnotise": "hypnotize",
754 | "hypnotised": "hypnotized",
755 | "hypnotises": "hypnotizes",
756 | "hypnotising": "hypnotizing",
757 | "hypothesise": "hypothesize",
758 | "hypothesised": "hypothesized",
759 | "hypothesises": "hypothesizes",
760 | "hypothesising": "hypothesizing",
761 | "idealisation": "idealization",
762 | "idealise": "idealize",
763 | "idealised": "idealized",
764 | "idealises": "idealizes",
765 | "idealising": "idealizing",
766 | "idolise": "idolize",
767 | "idolised": "idolized",
768 | "idolises": "idolizes",
769 | "idolising": "idolizing",
770 | "immobilisation": "immobilization",
771 | "immobilise": "immobilize",
772 | "immobilised": "immobilized",
773 | "immobiliser": "immobilizer",
774 | "immobilisers": "immobilizers",
775 | "immobilises": "immobilizes",
776 | "immobilising": "immobilizing",
777 | "immortalise": "immortalize",
778 | "immortalised": "immortalized",
779 | "immortalises": "immortalizes",
780 | "immortalising": "immortalizing",
781 | "immunisation": "immunization",
782 | "immunise": "immunize",
783 | "immunised": "immunized",
784 | "immunises": "immunizes",
785 | "immunising": "immunizing",
786 | "impanelled": "impaneled",
787 | "impanelling": "impaneling",
788 | "imperilled": "imperiled",
789 | "imperilling": "imperiling",
790 | "individualise": "individualize",
791 | "individualised": "individualized",
792 | "individualises": "individualizes",
793 | "individualising": "individualizing",
794 | "industrialise": "industrialize",
795 | "industrialised": "industrialized",
796 | "industrialises": "industrializes",
797 | "industrialising": "industrializing",
798 | "inflexion": "inflection",
799 | "inflexions": "inflections",
800 | "initialise": "initialize",
801 | "initialised": "initialized",
802 | "initialises": "initializes",
803 | "initialising": "initializing",
804 | "initialled": "initialed",
805 | "initialling": "initialing",
806 | "instal": "install",
807 | "instalment": "installment",
808 | "instalments": "installments",
809 | "instals": "installs",
810 | "instil": "instill",
811 | "instils": "instills",
812 | "institutionalisation": "institutionalization",
813 | "institutionalise": "institutionalize",
814 | "institutionalised": "institutionalized",
815 | "institutionalises": "institutionalizes",
816 | "institutionalising": "institutionalizing",
817 | "intellectualise": "intellectualize",
818 | "intellectualised": "intellectualized",
819 | "intellectualises": "intellectualizes",
820 | "intellectualising": "intellectualizing",
821 | "internalisation": "internalization",
822 | "internalise": "internalize",
823 | "internalised": "internalized",
824 | "internalises": "internalizes",
825 | "internalising": "internalizing",
826 | "internationalisation": "internationalization",
827 | "internationalise": "internationalize",
828 | "internationalised": "internationalized",
829 | "internationalises": "internationalizes",
830 | "internationalising": "internationalizing",
831 | "ionisation": "ionization",
832 | "ionise": "ionize",
833 | "ionised": "ionized",
834 | "ioniser": "ionizer",
835 | "ionisers": "ionizers",
836 | "ionises": "ionizes",
837 | "ionising": "ionizing",
838 | "italicise": "italicize",
839 | "italicised": "italicized",
840 | "italicises": "italicizes",
841 | "italicising": "italicizing",
842 | "itemise": "itemize",
843 | "itemised": "itemized",
844 | "itemises": "itemizes",
845 | "itemising": "itemizing",
846 | "jeopardise": "jeopardize",
847 | "jeopardised": "jeopardized",
848 | "jeopardises": "jeopardizes",
849 | "jeopardising": "jeopardizing",
850 | "jewelled": "jeweled",
851 | "jeweller": "jeweler",
852 | "jewellers": "jewelers",
853 | "jewellery": "jewelry",
854 | "judgement": "judgment",
855 | "kilogramme": "kilogram",
856 | "kilogrammes": "kilograms",
857 | "kilometre": "kilometer",
858 | "kilometres": "kilometers",
859 | "labelled": "labeled",
860 | "labelling": "labeling",
861 | "labour": "labor",
862 | "laboured": "labored",
863 | "labourer": "laborer",
864 | "labourers": "laborers",
865 | "labouring": "laboring",
866 | "labours": "labors",
867 | "lacklustre": "lackluster",
868 | "legalisation": "legalization",
869 | "legalise": "legalize",
870 | "legalised": "legalized",
871 | "legalises": "legalizes",
872 | "legalising": "legalizing",
873 | "legitimise": "legitimize",
874 | "legitimised": "legitimized",
875 | "legitimises": "legitimizes",
876 | "legitimising": "legitimizing",
877 | "leukaemia": "leukemia",
878 | "levelled": "leveled",
879 | "leveller": "leveler",
880 | "levellers": "levelers",
881 | "levelling": "leveling",
882 | "libelled": "libeled",
883 | "libelling": "libeling",
884 | "libellous": "libelous",
885 | "liberalisation": "liberalization",
886 | "liberalise": "liberalize",
887 | "liberalised": "liberalized",
888 | "liberalises": "liberalizes",
889 | "liberalising": "liberalizing",
890 | "licence": "license",
891 | "licenced": "licensed",
892 | "licences": "licenses",
893 | "licencing": "licensing",
894 | "likeable": "likable",
895 | "lionisation": "lionization",
896 | "lionise": "lionize",
897 | "lionised": "lionized",
898 | "lionises": "lionizes",
899 | "lionising": "lionizing",
900 | "liquidise": "liquidize",
901 | "liquidised": "liquidized",
902 | "liquidiser": "liquidizer",
903 | "liquidisers": "liquidizers",
904 | "liquidises": "liquidizes",
905 | "liquidising": "liquidizing",
906 | "litre": "liter",
907 | "litres": "liters",
908 | "localise": "localize",
909 | "localised": "localized",
910 | "localises": "localizes",
911 | "localising": "localizing",
912 | "louvre": "louver",
913 | "louvred": "louvered",
914 | "louvres": "louvers",
915 | "lustre": "luster",
916 | "magnetise": "magnetize",
917 | "magnetised": "magnetized",
918 | "magnetises": "magnetizes",
919 | "magnetising": "magnetizing",
920 | "manoeuvrability": "maneuverability",
921 | "manoeuvrable": "maneuverable",
922 | "manoeuvre": "maneuver",
923 | "manoeuvred": "maneuvered",
924 | "manoeuvres": "maneuvers",
925 | "manoeuvring": "maneuvering",
926 | "manoeuvrings": "maneuverings",
927 | "marginalisation": "marginalization",
928 | "marginalise": "marginalize",
929 | "marginalised": "marginalized",
930 | "marginalises": "marginalizes",
931 | "marginalising": "marginalizing",
932 | "marshalled": "marshaled",
933 | "marshalling": "marshaling",
934 | "marvelled": "marveled",
935 | "marvelling": "marveling",
936 | "marvellous": "marvelous",
937 | "marvellously": "marvelously",
938 | "materialisation": "materialization",
939 | "materialise": "materialize",
940 | "materialised": "materialized",
941 | "materialises": "materializes",
942 | "materialising": "materializing",
943 | "maximisation": "maximization",
944 | "maximise": "maximize",
945 | "maximised": "maximized",
946 | "maximises": "maximizes",
947 | "maximising": "maximizing",
948 | "meagre": "meager",
949 | "mechanisation": "mechanization",
950 | "mechanise": "mechanize",
951 | "mechanised": "mechanized",
952 | "mechanises": "mechanizes",
953 | "mechanising": "mechanizing",
954 | "mediaeval": "medieval",
955 | "memorialise": "memorialize",
956 | "memorialised": "memorialized",
957 | "memorialises": "memorializes",
958 | "memorialising": "memorializing",
959 | "memorise": "memorize",
960 | "memorised": "memorized",
961 | "memorises": "memorizes",
962 | "memorising": "memorizing",
963 | "mesmerise": "mesmerize",
964 | "mesmerised": "mesmerized",
965 | "mesmerises": "mesmerizes",
966 | "mesmerising": "mesmerizing",
967 | "metabolise": "metabolize",
968 | "metabolised": "metabolized",
969 | "metabolises": "metabolizes",
970 | "metabolising": "metabolizing",
971 | "metre": "meter",
972 | "metres": "meters",
973 | "mhm": "hmm",
974 | "micrometre": "micrometer",
975 | "micrometres": "micrometers",
976 | "militarise": "militarize",
977 | "militarised": "militarized",
978 | "militarises": "militarizes",
979 | "militarising": "militarizing",
980 | "milligramme": "milligram",
981 | "milligrammes": "milligrams",
982 | "millilitre": "milliliter",
983 | "millilitres": "milliliters",
984 | "millimetre": "millimeter",
985 | "millimetres": "millimeters",
986 | "miniaturisation": "miniaturization",
987 | "miniaturise": "miniaturize",
988 | "miniaturised": "miniaturized",
989 | "miniaturises": "miniaturizes",
990 | "miniaturising": "miniaturizing",
991 | "minibusses": "minibuses",
992 | "minimise": "minimize",
993 | "minimised": "minimized",
994 | "minimises": "minimizes",
995 | "minimising": "minimizing",
996 | "misbehaviour": "misbehavior",
997 | "misdemeanour": "misdemeanor",
998 | "misdemeanours": "misdemeanors",
999 | "misspelt": "misspelled",
1000 | "mitre": "miter",
1001 | "mitres": "miters",
1002 | "mm": "hmm",
1003 | "mmm": "hmm",
1004 | "mobilisation": "mobilization",
1005 | "mobilise": "mobilize",
1006 | "mobilised": "mobilized",
1007 | "mobilises": "mobilizes",
1008 | "mobilising": "mobilizing",
1009 | "modelled": "modeled",
1010 | "modeller": "modeler",
1011 | "modellers": "modelers",
1012 | "modelling": "modeling",
1013 | "modernise": "modernize",
1014 | "modernised": "modernized",
1015 | "modernises": "modernizes",
1016 | "modernising": "modernizing",
1017 | "moisturise": "moisturize",
1018 | "moisturised": "moisturized",
1019 | "moisturiser": "moisturizer",
1020 | "moisturisers": "moisturizers",
1021 | "moisturises": "moisturizes",
1022 | "moisturising": "moisturizing",
1023 | "monologue": "monolog",
1024 | "monologues": "monologs",
1025 | "monopolisation": "monopolization",
1026 | "monopolise": "monopolize",
1027 | "monopolised": "monopolized",
1028 | "monopolises": "monopolizes",
1029 | "monopolising": "monopolizing",
1030 | "moralise": "moralize",
1031 | "moralised": "moralized",
1032 | "moralises": "moralizes",
1033 | "moralising": "moralizing",
1034 | "motorised": "motorized",
1035 | "mould": "mold",
1036 | "moulded": "molded",
1037 | "moulder": "molder",
1038 | "mouldered": "moldered",
1039 | "mouldering": "moldering",
1040 | "moulders": "molders",
1041 | "mouldier": "moldier",
1042 | "mouldiest": "moldiest",
1043 | "moulding": "molding",
1044 | "mouldings": "moldings",
1045 | "moulds": "molds",
1046 | "mouldy": "moldy",
1047 | "moult": "molt",
1048 | "moulted": "molted",
1049 | "moulting": "molting",
1050 | "moults": "molts",
1051 | "moustache": "mustache",
1052 | "moustached": "mustached",
1053 | "moustaches": "mustaches",
1054 | "moustachioed": "mustachioed",
1055 | "multicoloured": "multicolored",
1056 | "nationalisation": "nationalization",
1057 | "nationalisations": "nationalizations",
1058 | "nationalise": "nationalize",
1059 | "nationalised": "nationalized",
1060 | "nationalises": "nationalizes",
1061 | "nationalising": "nationalizing",
1062 | "naturalisation": "naturalization",
1063 | "naturalise": "naturalize",
1064 | "naturalised": "naturalized",
1065 | "naturalises": "naturalizes",
1066 | "naturalising": "naturalizing",
1067 | "neighbour": "neighbor",
1068 | "neighbourhood": "neighborhood",
1069 | "neighbourhoods": "neighborhoods",
1070 | "neighbouring": "neighboring",
1071 | "neighbourliness": "neighborliness",
1072 | "neighbourly": "neighborly",
1073 | "neighbours": "neighbors",
1074 | "neutralisation": "neutralization",
1075 | "neutralise": "neutralize",
1076 | "neutralised": "neutralized",
1077 | "neutralises": "neutralizes",
1078 | "neutralising": "neutralizing",
1079 | "normalisation": "normalization",
1080 | "normalise": "normalize",
1081 | "normalised": "normalized",
1082 | "normalises": "normalizes",
1083 | "normalising": "normalizing",
1084 | "odour": "odor",
1085 | "odourless": "odorless",
1086 | "odours": "odors",
1087 | "oesophagus": "esophagus",
1088 | "oesophaguses": "esophaguses",
1089 | "oestrogen": "estrogen",
1090 | "offence": "offense",
1091 | "offences": "offenses",
1092 | "omelette": "omelet",
1093 | "omelettes": "omelets",
1094 | "optimise": "optimize",
1095 | "optimised": "optimized",
1096 | "optimises": "optimizes",
1097 | "optimising": "optimizing",
1098 | "organisation": "organization",
1099 | "organisational": "organizational",
1100 | "organisations": "organizations",
1101 | "organise": "organize",
1102 | "organised": "organized",
1103 | "organiser": "organizer",
1104 | "organisers": "organizers",
1105 | "organises": "organizes",
1106 | "organising": "organizing",
1107 | "orthopaedic": "orthopedic",
1108 | "orthopaedics": "orthopedics",
1109 | "ostracise": "ostracize",
1110 | "ostracised": "ostracized",
1111 | "ostracises": "ostracizes",
1112 | "ostracising": "ostracizing",
1113 | "outmanoeuvre": "outmaneuver",
1114 | "outmanoeuvred": "outmaneuvered",
1115 | "outmanoeuvres": "outmaneuvers",
1116 | "outmanoeuvring": "outmaneuvering",
1117 | "overemphasise": "overemphasize",
1118 | "overemphasised": "overemphasized",
1119 | "overemphasises": "overemphasizes",
1120 | "overemphasising": "overemphasizing",
1121 | "oxidisation": "oxidization",
1122 | "oxidise": "oxidize",
1123 | "oxidised": "oxidized",
1124 | "oxidises": "oxidizes",
1125 | "oxidising": "oxidizing",
1126 | "paederast": "pederast",
1127 | "paederasts": "pederasts",
1128 | "paediatric": "pediatric",
1129 | "paediatrician": "pediatrician",
1130 | "paediatricians": "pediatricians",
1131 | "paediatrics": "pediatrics",
1132 | "paedophile": "pedophile",
1133 | "paedophiles": "pedophiles",
1134 | "paedophilia": "pedophilia",
1135 | "palaeolithic": "paleolithic",
1136 | "palaeontologist": "paleontologist",
1137 | "palaeontologists": "paleontologists",
1138 | "palaeontology": "paleontology",
1139 | "panelled": "paneled",
1140 | "panelling": "paneling",
1141 | "panellist": "panelist",
1142 | "panellists": "panelists",
1143 | "paralyse": "paralyze",
1144 | "paralysed": "paralyzed",
1145 | "paralyses": "paralyzes",
1146 | "paralysing": "paralyzing",
1147 | "parcelled": "parceled",
1148 | "parcelling": "parceling",
1149 | "parlour": "parlor",
1150 | "parlours": "parlors",
1151 | "particularise": "particularize",
1152 | "particularised": "particularized",
1153 | "particularises": "particularizes",
1154 | "particularising": "particularizing",
1155 | "passivisation": "passivization",
1156 | "passivise": "passivize",
1157 | "passivised": "passivized",
1158 | "passivises": "passivizes",
1159 | "passivising": "passivizing",
1160 | "pasteurisation": "pasteurization",
1161 | "pasteurise": "pasteurize",
1162 | "pasteurised": "pasteurized",
1163 | "pasteurises": "pasteurizes",
1164 | "pasteurising": "pasteurizing",
1165 | "patronise": "patronize",
1166 | "patronised": "patronized",
1167 | "patronises": "patronizes",
1168 | "patronising": "patronizing",
1169 | "patronisingly": "patronizingly",
1170 | "pedalled": "pedaled",
1171 | "pedalling": "pedaling",
1172 | "pedestrianisation": "pedestrianization",
1173 | "pedestrianise": "pedestrianize",
1174 | "pedestrianised": "pedestrianized",
1175 | "pedestrianises": "pedestrianizes",
1176 | "pedestrianising": "pedestrianizing",
1177 | "penalise": "penalize",
1178 | "penalised": "penalized",
1179 | "penalises": "penalizes",
1180 | "penalising": "penalizing",
1181 | "pencilled": "penciled",
1182 | "pencilling": "penciling",
1183 | "personalise": "personalize",
1184 | "personalised": "personalized",
1185 | "personalises": "personalizes",
1186 | "personalising": "personalizing",
1187 | "pharmacopoeia": "pharmacopeia",
1188 | "pharmacopoeias": "pharmacopeias",
1189 | "philosophise": "philosophize",
1190 | "philosophised": "philosophized",
1191 | "philosophises": "philosophizes",
1192 | "philosophising": "philosophizing",
1193 | "philtre": "filter",
1194 | "philtres": "filters",
1195 | "phoney": "phony",
1196 | "plagiarise": "plagiarize",
1197 | "plagiarised": "plagiarized",
1198 | "plagiarises": "plagiarizes",
1199 | "plagiarising": "plagiarizing",
1200 | "plough": "plow",
1201 | "ploughed": "plowed",
1202 | "ploughing": "plowing",
1203 | "ploughman": "plowman",
1204 | "ploughmen": "plowmen",
1205 | "ploughs": "plows",
1206 | "ploughshare": "plowshare",
1207 | "ploughshares": "plowshares",
1208 | "polarisation": "polarization",
1209 | "polarise": "polarize",
1210 | "polarised": "polarized",
1211 | "polarises": "polarizes",
1212 | "polarising": "polarizing",
1213 | "politicisation": "politicization",
1214 | "politicise": "politicize",
1215 | "politicised": "politicized",
1216 | "politicises": "politicizes",
1217 | "politicising": "politicizing",
1218 | "popularisation": "popularization",
1219 | "popularise": "popularize",
1220 | "popularised": "popularized",
1221 | "popularises": "popularizes",
1222 | "popularising": "popularizing",
1223 | "pouffe": "pouf",
1224 | "pouffes": "poufs",
1225 | "practise": "practice",
1226 | "practised": "practiced",
1227 | "practises": "practices",
1228 | "practising": "practicing",
1229 | "praesidium": "presidium",
1230 | "praesidiums": "presidiums",
1231 | "pressurisation": "pressurization",
1232 | "pressurise": "pressurize",
1233 | "pressurised": "pressurized",
1234 | "pressurises": "pressurizes",
1235 | "pressurising": "pressurizing",
1236 | "pretence": "pretense",
1237 | "pretences": "pretenses",
1238 | "primaeval": "primeval",
1239 | "prioritisation": "prioritization",
1240 | "prioritise": "prioritize",
1241 | "prioritised": "prioritized",
1242 | "prioritises": "prioritizes",
1243 | "prioritising": "prioritizing",
1244 | "privatisation": "privatization",
1245 | "privatisations": "privatizations",
1246 | "privatise": "privatize",
1247 | "privatised": "privatized",
1248 | "privatises": "privatizes",
1249 | "privatising": "privatizing",
1250 | "professionalisation": "professionalization",
1251 | "professionalise": "professionalize",
1252 | "professionalised": "professionalized",
1253 | "professionalises": "professionalizes",
1254 | "professionalising": "professionalizing",
1255 | "programme": "program",
1256 | "programmes": "programs",
1257 | "prologue": "prolog",
1258 | "prologues": "prologs",
1259 | "propagandise": "propagandize",
1260 | "propagandised": "propagandized",
1261 | "propagandises": "propagandizes",
1262 | "propagandising": "propagandizing",
1263 | "proselytise": "proselytize",
1264 | "proselytised": "proselytized",
1265 | "proselytiser": "proselytizer",
1266 | "proselytisers": "proselytizers",
1267 | "proselytises": "proselytizes",
1268 | "proselytising": "proselytizing",
1269 | "psychoanalyse": "psychoanalyze",
1270 | "psychoanalysed": "psychoanalyzed",
1271 | "psychoanalyses": "psychoanalyzes",
1272 | "psychoanalysing": "psychoanalyzing",
1273 | "publicise": "publicize",
1274 | "publicised": "publicized",
1275 | "publicises": "publicizes",
1276 | "publicising": "publicizing",
1277 | "pulverisation": "pulverization",
1278 | "pulverise": "pulverize",
1279 | "pulverised": "pulverized",
1280 | "pulverises": "pulverizes",
1281 | "pulverising": "pulverizing",
1282 | "pummelled": "pummel",
1283 | "pummelling": "pummeled",
1284 | "pyjama": "pajama",
1285 | "pyjamas": "pajamas",
1286 | "pzazz": "pizzazz",
1287 | "quarrelled": "quarreled",
1288 | "quarrelling": "quarreling",
1289 | "radicalise": "radicalize",
1290 | "radicalised": "radicalized",
1291 | "radicalises": "radicalizes",
1292 | "radicalising": "radicalizing",
1293 | "rancour": "rancor",
1294 | "randomise": "randomize",
1295 | "randomised": "randomized",
1296 | "randomises": "randomizes",
1297 | "randomising": "randomizing",
1298 | "rationalisation": "rationalization",
1299 | "rationalisations": "rationalizations",
1300 | "rationalise": "rationalize",
1301 | "rationalised": "rationalized",
1302 | "rationalises": "rationalizes",
1303 | "rationalising": "rationalizing",
1304 | "ravelled": "raveled",
1305 | "ravelling": "raveling",
1306 | "realisable": "realizable",
1307 | "realisation": "realization",
1308 | "realisations": "realizations",
1309 | "realise": "realize",
1310 | "realised": "realized",
1311 | "realises": "realizes",
1312 | "realising": "realizing",
1313 | "recognisable": "recognizable",
1314 | "recognisably": "recognizably",
1315 | "recognisance": "recognizance",
1316 | "recognise": "recognize",
1317 | "recognised": "recognized",
1318 | "recognises": "recognizes",
1319 | "recognising": "recognizing",
1320 | "reconnoitre": "reconnoiter",
1321 | "reconnoitred": "reconnoitered",
1322 | "reconnoitres": "reconnoiters",
1323 | "reconnoitring": "reconnoitering",
1324 | "refuelled": "refueled",
1325 | "refuelling": "refueling",
1326 | "regularisation": "regularization",
1327 | "regularise": "regularize",
1328 | "regularised": "regularized",
1329 | "regularises": "regularizes",
1330 | "regularising": "regularizing",
1331 | "remodelled": "remodeled",
1332 | "remodelling": "remodeling",
1333 | "remould": "remold",
1334 | "remoulded": "remolded",
1335 | "remoulding": "remolding",
1336 | "remoulds": "remolds",
1337 | "reorganisation": "reorganization",
1338 | "reorganisations": "reorganizations",
1339 | "reorganise": "reorganize",
1340 | "reorganised": "reorganized",
1341 | "reorganises": "reorganizes",
1342 | "reorganising": "reorganizing",
1343 | "revelled": "reveled",
1344 | "reveller": "reveler",
1345 | "revellers": "revelers",
1346 | "revelling": "reveling",
1347 | "revitalise": "revitalize",
1348 | "revitalised": "revitalized",
1349 | "revitalises": "revitalizes",
1350 | "revitalising": "revitalizing",
1351 | "revolutionise": "revolutionize",
1352 | "revolutionised": "revolutionized",
1353 | "revolutionises": "revolutionizes",
1354 | "revolutionising": "revolutionizing",
1355 | "rhapsodise": "rhapsodize",
1356 | "rhapsodised": "rhapsodized",
1357 | "rhapsodises": "rhapsodizes",
1358 | "rhapsodising": "rhapsodizing",
1359 | "rigour": "rigor",
1360 | "rigours": "rigors",
1361 | "ritualised": "ritualized",
1362 | "rivalled": "rivaled",
1363 | "rivalling": "rivaling",
1364 | "romanticise": "romanticize",
1365 | "romanticised": "romanticized",
1366 | "romanticises": "romanticizes",
1367 | "romanticising": "romanticizing",
1368 | "rumour": "rumor",
1369 | "rumoured": "rumored",
1370 | "rumours": "rumors",
1371 | "sabre": "saber",
1372 | "sabres": "sabers",
1373 | "saltpetre": "saltpeter",
1374 | "sanitise": "sanitize",
1375 | "sanitised": "sanitized",
1376 | "sanitises": "sanitizes",
1377 | "sanitising": "sanitizing",
1378 | "satirise": "satirize",
1379 | "satirised": "satirized",
1380 | "satirises": "satirizes",
1381 | "satirising": "satirizing",
1382 | "saviour": "savior",
1383 | "saviours": "saviors",
1384 | "savour": "savor",
1385 | "savoured": "savored",
1386 | "savouries": "savories",
1387 | "savouring": "savoring",
1388 | "savours": "savors",
1389 | "savoury": "savory",
1390 | "scandalise": "scandalize",
1391 | "scandalised": "scandalized",
1392 | "scandalises": "scandalizes",
1393 | "scandalising": "scandalizing",
1394 | "sceptic": "skeptic",
1395 | "sceptical": "skeptical",
1396 | "sceptically": "skeptically",
1397 | "scepticism": "skepticism",
1398 | "sceptics": "skeptics",
1399 | "sceptre": "scepter",
1400 | "sceptres": "scepters",
1401 | "scrutinise": "scrutinize",
1402 | "scrutinised": "scrutinized",
1403 | "scrutinises": "scrutinizes",
1404 | "scrutinising": "scrutinizing",
1405 | "secularisation": "secularization",
1406 | "secularise": "secularize",
1407 | "secularised": "secularized",
1408 | "secularises": "secularizes",
1409 | "secularising": "secularizing",
1410 | "sensationalise": "sensationalize",
1411 | "sensationalised": "sensationalized",
1412 | "sensationalises": "sensationalizes",
1413 | "sensationalising": "sensationalizing",
1414 | "sensitise": "sensitize",
1415 | "sensitised": "sensitized",
1416 | "sensitises": "sensitizes",
1417 | "sensitising": "sensitizing",
1418 | "sentimentalise": "sentimentalize",
1419 | "sentimentalised": "sentimentalized",
1420 | "sentimentalises": "sentimentalizes",
1421 | "sentimentalising": "sentimentalizing",
1422 | "sepulchre": "sepulcher",
1423 | "sepulchres": "sepulchers",
1424 | "serialisation": "serialization",
1425 | "serialisations": "serializations",
1426 | "serialise": "serialize",
1427 | "serialised": "serialized",
1428 | "serialises": "serializes",
1429 | "serialising": "serializing",
1430 | "sermonise": "sermonize",
1431 | "sermonised": "sermonized",
1432 | "sermonises": "sermonizes",
1433 | "sermonising": "sermonizing",
1434 | "sheikh": "sheik",
1435 | "shovelled": "shoveled",
1436 | "shovelling": "shoveling",
1437 | "shrivelled": "shriveled",
1438 | "shrivelling": "shriveling",
1439 | "signalise": "signalize",
1440 | "signalised": "signalized",
1441 | "signalises": "signalizes",
1442 | "signalising": "signalizing",
1443 | "signalled": "signaled",
1444 | "signalling": "signaling",
1445 | "smoulder": "smolder",
1446 | "smouldered": "smoldered",
1447 | "smouldering": "smoldering",
1448 | "smoulders": "smolders",
1449 | "snivelled": "sniveled",
1450 | "snivelling": "sniveling",
1451 | "snorkelled": "snorkeled",
1452 | "snorkelling": "snorkeling",
1453 | "snowplough": "snowplow",
1454 | "snowploughs": "snowplow",
1455 | "socialisation": "socialization",
1456 | "socialise": "socialize",
1457 | "socialised": "socialized",
1458 | "socialises": "socializes",
1459 | "socialising": "socializing",
1460 | "sodomise": "sodomize",
1461 | "sodomised": "sodomized",
1462 | "sodomises": "sodomizes",
1463 | "sodomising": "sodomizing",
1464 | "solemnise": "solemnize",
1465 | "solemnised": "solemnized",
1466 | "solemnises": "solemnizes",
1467 | "solemnising": "solemnizing",
1468 | "sombre": "somber",
1469 | "specialisation": "specialization",
1470 | "specialisations": "specializations",
1471 | "specialise": "specialize",
1472 | "specialised": "specialized",
1473 | "specialises": "specializes",
1474 | "specialising": "specializing",
1475 | "spectre": "specter",
1476 | "spectres": "specters",
1477 | "spiralled": "spiraled",
1478 | "spiralling": "spiraling",
1479 | "splendour": "splendor",
1480 | "splendours": "splendors",
1481 | "squirrelled": "squirreled",
1482 | "squirrelling": "squirreling",
1483 | "stabilisation": "stabilization",
1484 | "stabilise": "stabilize",
1485 | "stabilised": "stabilized",
1486 | "stabiliser": "stabilizer",
1487 | "stabilisers": "stabilizers",
1488 | "stabilises": "stabilizes",
1489 | "stabilising": "stabilizing",
1490 | "standardisation": "standardization",
1491 | "standardise": "standardize",
1492 | "standardised": "standardized",
1493 | "standardises": "standardizes",
1494 | "standardising": "standardizing",
1495 | "stencilled": "stenciled",
1496 | "stencilling": "stenciling",
1497 | "sterilisation": "sterilization",
1498 | "sterilisations": "sterilizations",
1499 | "sterilise": "sterilize",
1500 | "sterilised": "sterilized",
1501 | "steriliser": "sterilizer",
1502 | "sterilisers": "sterilizers",
1503 | "sterilises": "sterilizes",
1504 | "sterilising": "sterilizing",
1505 | "stigmatisation": "stigmatization",
1506 | "stigmatise": "stigmatize",
1507 | "stigmatised": "stigmatized",
1508 | "stigmatises": "stigmatizes",
1509 | "stigmatising": "stigmatizing",
1510 | "storey": "story",
1511 | "storeys": "stories",
1512 | "subsidisation": "subsidization",
1513 | "subsidise": "subsidize",
1514 | "subsidised": "subsidized",
1515 | "subsidiser": "subsidizer",
1516 | "subsidisers": "subsidizers",
1517 | "subsidises": "subsidizes",
1518 | "subsidising": "subsidizing",
1519 | "succour": "succor",
1520 | "succoured": "succored",
1521 | "succouring": "succoring",
1522 | "succours": "succors",
1523 | "sulphate": "sulfate",
1524 | "sulphates": "sulfates",
1525 | "sulphide": "sulfide",
1526 | "sulphides": "sulfides",
1527 | "sulphur": "sulfur",
1528 | "sulphurous": "sulfurous",
1529 | "summarise": "summarize",
1530 | "summarised": "summarized",
1531 | "summarises": "summarizes",
1532 | "summarising": "summarizing",
1533 | "swivelled": "swiveled",
1534 | "swivelling": "swiveling",
1535 | "symbolise": "symbolize",
1536 | "symbolised": "symbolized",
1537 | "symbolises": "symbolizes",
1538 | "symbolising": "symbolizing",
1539 | "sympathise": "sympathize",
1540 | "sympathised": "sympathized",
1541 | "sympathiser": "sympathizer",
1542 | "sympathisers": "sympathizers",
1543 | "sympathises": "sympathizes",
1544 | "sympathising": "sympathizing",
1545 | "synchronisation": "synchronization",
1546 | "synchronise": "synchronize",
1547 | "synchronised": "synchronized",
1548 | "synchronises": "synchronizes",
1549 | "synchronising": "synchronizing",
1550 | "synthesise": "synthesize",
1551 | "synthesised": "synthesized",
1552 | "synthesiser": "synthesizer",
1553 | "synthesisers": "synthesizers",
1554 | "synthesises": "synthesizes",
1555 | "synthesising": "synthesizing",
1556 | "syphon": "siphon",
1557 | "syphoned": "siphoned",
1558 | "syphoning": "siphoning",
1559 | "syphons": "siphons",
1560 | "systematisation": "systematization",
1561 | "systematise": "systematize",
1562 | "systematised": "systematized",
1563 | "systematises": "systematizes",
1564 | "systematising": "systematizing",
1565 | "tantalise": "tantalize",
1566 | "tantalised": "tantalized",
1567 | "tantalises": "tantalizes",
1568 | "tantalising": "tantalizing",
1569 | "tantalisingly": "tantalizingly",
1570 | "tasselled": "tasseled",
1571 | "technicolour": "technicolor",
1572 | "temporise": "temporize",
1573 | "temporised": "temporized",
1574 | "temporises": "temporizes",
1575 | "temporising": "temporizing",
1576 | "tenderise": "tenderize",
1577 | "tenderised": "tenderized",
1578 | "tenderises": "tenderizes",
1579 | "tenderising": "tenderizing",
1580 | "terrorise": "terrorize",
1581 | "terrorised": "terrorized",
1582 | "terrorises": "terrorizes",
1583 | "terrorising": "terrorizing",
1584 | "theatre": "theater",
1585 | "theatregoer": "theatergoer",
1586 | "theatregoers": "theatergoers",
1587 | "theatres": "theaters",
1588 | "theorise": "theorize",
1589 | "theorised": "theorized",
1590 | "theorises": "theorizes",
1591 | "theorising": "theorizing",
1592 | "tonne": "ton",
1593 | "tonnes": "tons",
1594 | "towelled": "toweled",
1595 | "towelling": "toweling",
1596 | "toxaemia": "toxemia",
1597 | "tranquillise": "tranquilize",
1598 | "tranquillised": "tranquilized",
1599 | "tranquilliser": "tranquilizer",
1600 | "tranquillisers": "tranquilizers",
1601 | "tranquillises": "tranquilizes",
1602 | "tranquillising": "tranquilizing",
1603 | "tranquillity": "tranquility",
1604 | "tranquillize": "tranquilize",
1605 | "tranquillized": "tranquilized",
1606 | "tranquillizer": "tranquilizer",
1607 | "tranquillizers": "tranquilizers",
1608 | "tranquillizes": "tranquilizes",
1609 | "tranquillizing": "tranquilizing",
1610 | "tranquilly": "tranquility",
1611 | "transistorised": "transistorized",
1612 | "traumatise": "traumatize",
1613 | "traumatised": "traumatized",
1614 | "traumatises": "traumatizes",
1615 | "traumatising": "traumatizing",
1616 | "travelled": "traveled",
1617 | "traveller": "traveler",
1618 | "travellers": "travelers",
1619 | "travelling": "traveling",
1620 | "travelog": "travelogue",
1621 | "travelogs": "travelogues",
1622 | "trialled": "trialed",
1623 | "trialling": "trialing",
1624 | "tricolour": "tricolor",
1625 | "tricolours": "tricolors",
1626 | "trivialise": "trivialize",
1627 | "trivialised": "trivialized",
1628 | "trivialises": "trivializes",
1629 | "trivialising": "trivializing",
1630 | "tumour": "tumor",
1631 | "tumours": "tumors",
1632 | "tunnelled": "tunneled",
1633 | "tunnelling": "tunneling",
1634 | "tyrannise": "tyrannize",
1635 | "tyrannised": "tyrannized",
1636 | "tyrannises": "tyrannizes",
1637 | "tyrannising": "tyrannizing",
1638 | "tyre": "tire",
1639 | "tyres": "tires",
1640 | "unauthorised": "unauthorized",
1641 | "uncivilised": "uncivilized",
1642 | "underutilised": "underutilized",
1643 | "unequalled": "unequaled",
1644 | "unfavourable": "unfavorable",
1645 | "unfavourably": "unfavorably",
1646 | "unionisation": "unionization",
1647 | "unionise": "unionize",
1648 | "unionised": "unionized",
1649 | "unionises": "unionizes",
1650 | "unionising": "unionizing",
1651 | "unorganised": "unorganized",
1652 | "unravelled": "unraveled",
1653 | "unravelling": "unraveling",
1654 | "unrecognisable": "unrecognizable",
1655 | "unrecognised": "unrecognized",
1656 | "unrivalled": "unrivaled",
1657 | "unsavoury": "unsavory",
1658 | "untrammelled": "untrammeled",
1659 | "urbanisation": "urbanization",
1660 | "urbanise": "urbanize",
1661 | "urbanised": "urbanized",
1662 | "urbanises": "urbanizes",
1663 | "urbanising": "urbanizing",
1664 | "utilisable": "utilizable",
1665 | "utilisation": "utilization",
1666 | "utilise": "utilize",
1667 | "utilised": "utilized",
1668 | "utilises": "utilizes",
1669 | "utilising": "utilizing",
1670 | "valour": "valor",
1671 | "vandalise": "vandalize",
1672 | "vandalised": "vandalized",
1673 | "vandalises": "vandalizes",
1674 | "vandalising": "vandalizing",
1675 | "vaporisation": "vaporization",
1676 | "vaporise": "vaporize",
1677 | "vaporised": "vaporized",
1678 | "vaporises": "vaporizes",
1679 | "vaporising": "vaporizing",
1680 | "vapour": "vapor",
1681 | "vapours": "vapors",
1682 | "verbalise": "verbalize",
1683 | "verbalised": "verbalized",
1684 | "verbalises": "verbalizes",
1685 | "verbalising": "verbalizing",
1686 | "victimisation": "victimization",
1687 | "victimise": "victimize",
1688 | "victimised": "victimized",
1689 | "victimises": "victimizes",
1690 | "victimising": "victimizing",
1691 | "videodisc": "videodisk",
1692 | "videodiscs": "videodisks",
1693 | "vigour": "vigor",
1694 | "visualisation": "visualization",
1695 | "visualisations": "visualizations",
1696 | "visualise": "visualize",
1697 | "visualised": "visualized",
1698 | "visualises": "visualizes",
1699 | "visualising": "visualizing",
1700 | "vocalisation": "vocalization",
1701 | "vocalisations": "vocalizations",
1702 | "vocalise": "vocalize",
1703 | "vocalised": "vocalized",
1704 | "vocalises": "vocalizes",
1705 | "vocalising": "vocalizing",
1706 | "vulcanised": "vulcanized",
1707 | "vulgarisation": "vulgarization",
1708 | "vulgarise": "vulgarize",
1709 | "vulgarised": "vulgarized",
1710 | "vulgarises": "vulgarizes",
1711 | "vulgarising": "vulgarizing",
1712 | "waggon": "wagon",
1713 | "waggons": "wagons",
1714 | "watercolour": "watercolor",
1715 | "watercolours": "watercolors",
1716 | "weaselled": "weaseled",
1717 | "weaselling": "weaseling",
1718 | "westernisation": "westernization",
1719 | "westernise": "westernize",
1720 | "westernised": "westernized",
1721 | "westernises": "westernizes",
1722 | "westernising": "westernizing",
1723 | "womanise": "womanize",
1724 | "womanised": "womanized",
1725 | "womaniser": "womanizer",
1726 | "womanisers": "womanizers",
1727 | "womanises": "womanizes",
1728 | "womanising": "womanizing",
1729 | "woollen": "woolen",
1730 | "woollens": "woolens",
1731 | "woollies": "woolies",
1732 | "woolly": "wooly",
1733 | "worshipped": "worshiped",
1734 | "worshipper": "worshiper",
1735 | "worshipping": "worshiping",
1736 | "yodelled": "yodeled",
1737 | "yodelling": "yodeling",
1738 | "yoghourt": "yogurt",
1739 | "yoghourts": "yogurts",
1740 | "yoghurt": "yogurt",
1741 | "yoghurts": "yogurts"
1742 | }
1743 |
--------------------------------------------------------------------------------
/benchmark/requirements.benchmark.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | jiwer
3 | datasets
4 | memory_profiler
5 | py3nvml
6 | pytubefix
7 |
--------------------------------------------------------------------------------
/benchmark/speed_benchmark.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import timeit
3 |
4 | from typing import Callable
5 |
6 | from utils import inference
7 |
8 | parser = argparse.ArgumentParser(description="Speed benchmark")
9 | parser.add_argument(
10 | "--repeat",
11 | type=int,
12 | default=3,
13 | help="Times an experiment will be run.",
14 | )
15 | args = parser.parse_args()
16 |
17 |
18 | def measure_speed(func: Callable[[], None]):
19 | # as written in https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat,
20 | # min should be taken rather than the average
21 | runtimes = timeit.repeat(
22 | func,
23 | repeat=args.repeat,
24 | number=10,
25 | )
26 | print(runtimes)
27 | print("Min execution time: %.3fs" % (min(runtimes) / 10.0))
28 |
29 |
30 | if __name__ == "__main__":
31 | measure_speed(inference)
32 |
--------------------------------------------------------------------------------
/benchmark/utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from threading import Thread
4 | from typing import Optional
5 |
6 | from faster_whisper import WhisperModel
7 |
8 | model_path = "large-v3"
9 | model = WhisperModel(model_path, device="cuda")
10 |
11 |
12 | def inference():
13 | segments, info = model.transcribe("benchmark.m4a", language="fr")
14 | for segment in segments:
15 | print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
16 |
17 |
18 | def get_logger(name: Optional[str] = None) -> logging.Logger:
19 | formatter = logging.Formatter("%(levelname)s: %(message)s")
20 | logger = logging.getLogger(name)
21 | logger.setLevel(logging.DEBUG)
22 | handler = logging.StreamHandler()
23 | handler.setFormatter(formatter)
24 | logger.addHandler(handler)
25 | return logger
26 |
27 |
28 | class MyThread(Thread):
29 | def __init__(self, func, params):
30 | super(MyThread, self).__init__()
31 | self.func = func
32 | self.params = params
33 | self.result = None
34 |
35 | def run(self):
36 | self.result = self.func(*self.params)
37 |
38 | def get_result(self):
39 | return self.result
40 |
--------------------------------------------------------------------------------
/benchmark/wer_benchmark.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import os
4 |
5 | from datasets import load_dataset
6 | from jiwer import wer
7 | from tqdm import tqdm
8 | from transformers.models.whisper.english_normalizer import EnglishTextNormalizer
9 |
10 | from faster_whisper import WhisperModel
11 |
12 | parser = argparse.ArgumentParser(description="WER benchmark")
13 | parser.add_argument(
14 | "--audio_numb",
15 | type=int,
16 | default=None,
17 | help="Specify the number of validation audio files in the dataset."
18 | " Set to None to retrieve all audio files.",
19 | )
20 | args = parser.parse_args()
21 |
22 | model_path = "large-v3"
23 | model = WhisperModel(model_path, device="cuda")
24 |
25 | # load the dataset with streaming mode
26 | dataset = load_dataset("librispeech_asr", "clean", split="validation", streaming=True)
27 |
28 | with open(os.path.join(os.path.dirname(__file__), "normalizer.json"), "r") as f:
29 | normalizer = EnglishTextNormalizer(json.load(f))
30 |
31 |
32 | def inference(batch):
33 | batch["transcription"] = []
34 | for sample in batch["audio"]:
35 | segments, info = model.transcribe(sample["array"], language="en")
36 | batch["transcription"].append("".join([segment.text for segment in segments]))
37 | batch["reference"] = batch["text"]
38 | return batch
39 |
40 |
41 | dataset = dataset.map(function=inference, batched=True, batch_size=16)
42 |
43 | all_transcriptions = []
44 | all_references = []
45 |
46 | # iterate over the dataset and run inference
47 | for i, result in tqdm(enumerate(dataset), desc="Evaluating..."):
48 | all_transcriptions.append(result["transcription"])
49 | all_references.append(result["reference"])
50 | if args.audio_numb and i == (args.audio_numb - 1):
51 | break
52 |
53 | # normalize predictions and references
54 | all_transcriptions = [normalizer(transcription) for transcription in all_transcriptions]
55 | all_references = [normalizer(reference) for reference in all_references]
56 |
57 | # compute the WER metric
58 | word_error_rate = 100 * wer(hypothesis=all_transcriptions, reference=all_references)
59 | print("WER: %.3f" % word_error_rate)
60 |
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
2 | WORKDIR /root
3 | RUN apt-get update -y && apt-get install -y python3-pip
4 | COPY infer.py jfk.flac ./
5 | RUN pip3 install faster-whisper
6 | CMD ["python3", "infer.py"]
7 |
--------------------------------------------------------------------------------
/docker/infer.py:
--------------------------------------------------------------------------------
1 | from faster_whisper import WhisperModel
2 |
3 | jfk_path = "jfk.flac"
4 | model = WhisperModel("tiny", device="cuda")
5 | segments, info = model.transcribe(jfk_path, word_timestamps=True)
6 | for segment in segments:
7 | print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
8 |
--------------------------------------------------------------------------------
/docker/jfk.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/docker/jfk.flac
--------------------------------------------------------------------------------
/faster_whisper/__init__.py:
--------------------------------------------------------------------------------
1 | from faster_whisper.audio import decode_audio
2 | from faster_whisper.transcribe import BatchedInferencePipeline, WhisperModel
3 | from faster_whisper.utils import available_models, download_model, format_timestamp
4 | from faster_whisper.version import __version__
5 |
6 | __all__ = [
7 | "available_models",
8 | "decode_audio",
9 | "WhisperModel",
10 | "BatchedInferencePipeline",
11 | "download_model",
12 | "format_timestamp",
13 | "__version__",
14 | ]
15 |
--------------------------------------------------------------------------------
/faster_whisper/assets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/faster_whisper/assets/__init__.py
--------------------------------------------------------------------------------
/faster_whisper/assets/silero_decoder_v5.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/faster_whisper/assets/silero_decoder_v5.onnx
--------------------------------------------------------------------------------
/faster_whisper/assets/silero_encoder_v5.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/faster_whisper/assets/silero_encoder_v5.onnx
--------------------------------------------------------------------------------
/faster_whisper/audio.py:
--------------------------------------------------------------------------------
1 | """We use the PyAV library to decode the audio: https://github.com/PyAV-Org/PyAV
2 |
3 | The advantage of PyAV is that it bundles the FFmpeg libraries so there is no additional
4 | system dependencies. FFmpeg does not need to be installed on the system.
5 |
6 | However, the API is quite low-level so we need to manipulate audio frames directly.
7 | """
8 |
9 | import gc
10 | import io
11 | import itertools
12 |
13 | from typing import BinaryIO, Union
14 |
15 | import av
16 | import numpy as np
17 |
18 |
19 | def decode_audio(
20 | input_file: Union[str, BinaryIO],
21 | sampling_rate: int = 16000,
22 | split_stereo: bool = False,
23 | ):
24 | """Decodes the audio.
25 |
26 | Args:
27 | input_file: Path to the input file or a file-like object.
28 | sampling_rate: Resample the audio to this sample rate.
29 | split_stereo: Return separate left and right channels.
30 |
31 | Returns:
32 | A float32 Numpy array.
33 |
34 | If `split_stereo` is enabled, the function returns a 2-tuple with the
35 | separated left and right channels.
36 | """
37 | resampler = av.audio.resampler.AudioResampler(
38 | format="s16",
39 | layout="mono" if not split_stereo else "stereo",
40 | rate=sampling_rate,
41 | )
42 |
43 | raw_buffer = io.BytesIO()
44 | dtype = None
45 |
46 | with av.open(input_file, mode="r", metadata_errors="ignore") as container:
47 | frames = container.decode(audio=0)
48 | frames = _ignore_invalid_frames(frames)
49 | frames = _group_frames(frames, 500000)
50 | frames = _resample_frames(frames, resampler)
51 |
52 | for frame in frames:
53 | array = frame.to_ndarray()
54 | dtype = array.dtype
55 | raw_buffer.write(array)
56 |
57 | # It appears that some objects related to the resampler are not freed
58 | # unless the garbage collector is manually run.
59 | # https://github.com/SYSTRAN/faster-whisper/issues/390
60 | # note that this slows down loading the audio a little bit
61 | # if that is a concern, please use ffmpeg directly as in here:
62 | # https://github.com/openai/whisper/blob/25639fc/whisper/audio.py#L25-L62
63 | del resampler
64 | gc.collect()
65 |
66 | audio = np.frombuffer(raw_buffer.getbuffer(), dtype=dtype)
67 |
68 | # Convert s16 back to f32.
69 | audio = audio.astype(np.float32) / 32768.0
70 |
71 | if split_stereo:
72 | left_channel = audio[0::2]
73 | right_channel = audio[1::2]
74 | return left_channel, right_channel
75 |
76 | return audio
77 |
78 |
79 | def _ignore_invalid_frames(frames):
80 | iterator = iter(frames)
81 |
82 | while True:
83 | try:
84 | yield next(iterator)
85 | except StopIteration:
86 | break
87 | except av.error.InvalidDataError:
88 | continue
89 |
90 |
91 | def _group_frames(frames, num_samples=None):
92 | fifo = av.audio.fifo.AudioFifo()
93 |
94 | for frame in frames:
95 | frame.pts = None # Ignore timestamp check.
96 | fifo.write(frame)
97 |
98 | if num_samples is not None and fifo.samples >= num_samples:
99 | yield fifo.read()
100 |
101 | if fifo.samples > 0:
102 | yield fifo.read()
103 |
104 |
105 | def _resample_frames(frames, resampler):
106 | # Add None to flush the resampler.
107 | for frame in itertools.chain(frames, [None]):
108 | yield from resampler.resample(frame)
109 |
110 |
111 | def pad_or_trim(array, length: int = 3000, *, axis: int = -1):
112 | """
113 | Pad or trim the Mel features array to 3000, as expected by the encoder.
114 | """
115 | if array.shape[axis] > length:
116 | array = array.take(indices=range(length), axis=axis)
117 |
118 | if array.shape[axis] < length:
119 | pad_widths = [(0, 0)] * array.ndim
120 | pad_widths[axis] = (0, length - array.shape[axis])
121 | array = np.pad(array, pad_widths)
122 |
123 | return array
124 |
--------------------------------------------------------------------------------
/faster_whisper/feature_extractor.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class FeatureExtractor:
5 | def __init__(
6 | self,
7 | feature_size=80,
8 | sampling_rate=16000,
9 | hop_length=160,
10 | chunk_length=30,
11 | n_fft=400,
12 | ):
13 | self.n_fft = n_fft
14 | self.hop_length = hop_length
15 | self.chunk_length = chunk_length
16 | self.n_samples = chunk_length * sampling_rate
17 | self.nb_max_frames = self.n_samples // hop_length
18 | self.time_per_frame = hop_length / sampling_rate
19 | self.sampling_rate = sampling_rate
20 | self.mel_filters = self.get_mel_filters(
21 | sampling_rate, n_fft, n_mels=feature_size
22 | ).astype("float32")
23 |
24 | @staticmethod
25 | def get_mel_filters(sr, n_fft, n_mels=128):
26 | # Initialize the weights
27 | n_mels = int(n_mels)
28 |
29 | # Center freqs of each FFT bin
30 | fftfreqs = np.fft.rfftfreq(n=n_fft, d=1.0 / sr)
31 |
32 | # 'Center freqs' of mel bands - uniformly spaced between limits
33 | min_mel = 0.0
34 | max_mel = 45.245640471924965
35 |
36 | mels = np.linspace(min_mel, max_mel, n_mels + 2)
37 |
38 | # Fill in the linear scale
39 | f_min = 0.0
40 | f_sp = 200.0 / 3
41 | freqs = f_min + f_sp * mels
42 |
43 | # And now the nonlinear scale
44 | min_log_hz = 1000.0 # beginning of log region (Hz)
45 | min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels)
46 | logstep = np.log(6.4) / 27.0 # step size for log region
47 |
48 | # If we have vector data, vectorize
49 | log_t = mels >= min_log_mel
50 | freqs[log_t] = min_log_hz * np.exp(logstep * (mels[log_t] - min_log_mel))
51 |
52 | fdiff = np.diff(freqs)
53 | ramps = freqs.reshape(-1, 1) - fftfreqs.reshape(1, -1)
54 |
55 | lower = -ramps[:-2] / np.expand_dims(fdiff[:-1], axis=1)
56 | upper = ramps[2:] / np.expand_dims(fdiff[1:], axis=1)
57 |
58 | # Intersect them with each other and zero, vectorized across all i
59 | weights = np.maximum(np.zeros_like(lower), np.minimum(lower, upper))
60 |
61 | # Slaney-style mel is scaled to be approx constant energy per channel
62 | enorm = 2.0 / (freqs[2 : n_mels + 2] - freqs[:n_mels])
63 | weights *= np.expand_dims(enorm, axis=1)
64 |
65 | return weights
66 |
67 | @staticmethod
68 | def stft(
69 | input_array: np.ndarray,
70 | n_fft: int,
71 | hop_length: int = None,
72 | win_length: int = None,
73 | window: np.ndarray = None,
74 | center: bool = True,
75 | mode: str = "reflect",
76 | normalized: bool = False,
77 | onesided: bool = None,
78 | return_complex: bool = None,
79 | ):
80 | # Default initialization for hop_length and win_length
81 | hop_length = hop_length if hop_length is not None else n_fft // 4
82 | win_length = win_length if win_length is not None else n_fft
83 | input_is_complex = np.iscomplexobj(input_array)
84 |
85 | # Determine if the output should be complex
86 | return_complex = (
87 | return_complex
88 | if return_complex is not None
89 | else (input_is_complex or (window is not None and np.iscomplexobj(window)))
90 | )
91 |
92 | if not return_complex and return_complex is None:
93 | raise ValueError(
94 | "stft requires the return_complex parameter for real inputs."
95 | )
96 |
97 | # Input checks
98 | if not np.issubdtype(input_array.dtype, np.floating) and not input_is_complex:
99 | raise ValueError(
100 | "stft: expected an array of floating point or complex values,"
101 | f" got {input_array.dtype}"
102 | )
103 |
104 | if input_array.ndim > 2 or input_array.ndim < 1:
105 | raise ValueError(
106 | f"stft: expected a 1D or 2D array, but got {input_array.ndim}D array"
107 | )
108 |
109 | # Handle 1D input
110 | if input_array.ndim == 1:
111 | input_array = np.expand_dims(input_array, axis=0)
112 | input_array_1d = True
113 | else:
114 | input_array_1d = False
115 |
116 | # Center padding if required
117 | if center:
118 | pad_amount = n_fft // 2
119 | input_array = np.pad(
120 | input_array, ((0, 0), (pad_amount, pad_amount)), mode=mode
121 | )
122 |
123 | batch, length = input_array.shape
124 |
125 | # Additional input checks
126 | if n_fft <= 0 or n_fft > length:
127 | raise ValueError(
128 | f"stft: expected 0 < n_fft <= {length}, but got n_fft={n_fft}"
129 | )
130 |
131 | if hop_length <= 0:
132 | raise ValueError(
133 | f"stft: expected hop_length > 0, but got hop_length={hop_length}"
134 | )
135 |
136 | if win_length <= 0 or win_length > n_fft:
137 | raise ValueError(
138 | f"stft: expected 0 < win_length <= n_fft, but got win_length={win_length}"
139 | )
140 |
141 | if window is not None:
142 | if window.ndim != 1 or window.shape[0] != win_length:
143 | raise ValueError(
144 | f"stft: expected a 1D window array of size equal to win_length={win_length}, "
145 | f"but got window with size {window.shape}"
146 | )
147 |
148 | # Handle padding of the window if necessary
149 | if win_length < n_fft:
150 | left = (n_fft - win_length) // 2
151 | window_ = np.zeros(n_fft, dtype=window.dtype)
152 | window_[left : left + win_length] = window
153 | else:
154 | window_ = window
155 |
156 | # Calculate the number of frames
157 | n_frames = 1 + (length - n_fft) // hop_length
158 |
159 | # Time to columns
160 | input_array = np.lib.stride_tricks.as_strided(
161 | input_array,
162 | (batch, n_frames, n_fft),
163 | (
164 | input_array.strides[0],
165 | hop_length * input_array.strides[1],
166 | input_array.strides[1],
167 | ),
168 | )
169 |
170 | if window_ is not None:
171 | input_array = input_array * window_
172 |
173 | # FFT and transpose
174 | complex_fft = input_is_complex
175 | onesided = onesided if onesided is not None else not complex_fft
176 |
177 | if normalized:
178 | norm = "ortho"
179 | else:
180 | norm = None
181 |
182 | if complex_fft:
183 | if onesided:
184 | raise ValueError(
185 | "Cannot have onesided output if window or input is complex"
186 | )
187 | output = np.fft.fft(input_array, n=n_fft, axis=-1, norm=norm)
188 | else:
189 | output = np.fft.rfft(input_array, n=n_fft, axis=-1, norm=norm)
190 |
191 | output = output.transpose((0, 2, 1))
192 |
193 | if input_array_1d:
194 | output = output.squeeze(0)
195 |
196 | return output if return_complex else np.real(output)
197 |
198 | def __call__(self, waveform: np.ndarray, padding=160, chunk_length=None):
199 | """
200 | Compute the log-Mel spectrogram of the provided audio.
201 | """
202 |
203 | if chunk_length is not None:
204 | self.n_samples = chunk_length * self.sampling_rate
205 | self.nb_max_frames = self.n_samples // self.hop_length
206 |
207 | if waveform.dtype is not np.float32:
208 | waveform = waveform.astype(np.float32)
209 |
210 | if padding:
211 | waveform = np.pad(waveform, (0, padding))
212 |
213 | window = np.hanning(self.n_fft + 1)[:-1].astype("float32")
214 |
215 | stft = self.stft(
216 | waveform,
217 | self.n_fft,
218 | self.hop_length,
219 | window=window,
220 | return_complex=True,
221 | ).astype("complex64")
222 | magnitudes = np.abs(stft[..., :-1]) ** 2
223 |
224 | mel_spec = self.mel_filters @ magnitudes
225 |
226 | log_spec = np.log10(np.clip(mel_spec, a_min=1e-10, a_max=None))
227 | log_spec = np.maximum(log_spec, log_spec.max() - 8.0)
228 | log_spec = (log_spec + 4.0) / 4.0
229 |
230 | return log_spec
231 |
--------------------------------------------------------------------------------
/faster_whisper/tokenizer.py:
--------------------------------------------------------------------------------
1 | import string
2 |
3 | from functools import cached_property
4 | from typing import List, Optional, Tuple
5 |
6 | import tokenizers
7 |
8 |
9 | class Tokenizer:
10 | """Simple wrapper around a tokenizers.Tokenizer."""
11 |
12 | def __init__(
13 | self,
14 | tokenizer: tokenizers.Tokenizer,
15 | multilingual: bool,
16 | task: Optional[str] = None,
17 | language: Optional[str] = None,
18 | ):
19 | self.tokenizer = tokenizer
20 |
21 | if multilingual:
22 | if task not in _TASKS:
23 | raise ValueError(
24 | "'%s' is not a valid task (accepted tasks: %s)"
25 | % (task, ", ".join(_TASKS))
26 | )
27 |
28 | if language not in _LANGUAGE_CODES:
29 | raise ValueError(
30 | "'%s' is not a valid language code (accepted language codes: %s)"
31 | % (language, ", ".join(_LANGUAGE_CODES))
32 | )
33 |
34 | self.task = self.tokenizer.token_to_id("<|%s|>" % task)
35 | self.language = self.tokenizer.token_to_id("<|%s|>" % language)
36 | self.language_code = language
37 | else:
38 | self.task = None
39 | self.language = None
40 | self.language_code = "en"
41 |
42 | @cached_property
43 | def transcribe(self) -> int:
44 | return self.tokenizer.token_to_id("<|transcribe|>")
45 |
46 | @cached_property
47 | def translate(self) -> int:
48 | return self.tokenizer.token_to_id("<|translate|>")
49 |
50 | @cached_property
51 | def sot(self) -> int:
52 | return self.tokenizer.token_to_id("<|startoftranscript|>")
53 |
54 | @cached_property
55 | def sot_lm(self) -> int:
56 | return self.tokenizer.token_to_id("<|startoflm|>")
57 |
58 | @cached_property
59 | def sot_prev(self) -> int:
60 | return self.tokenizer.token_to_id("<|startofprev|>")
61 |
62 | @cached_property
63 | def eot(self) -> int:
64 | return self.tokenizer.token_to_id("<|endoftext|>")
65 |
66 | @cached_property
67 | def no_timestamps(self) -> int:
68 | return self.tokenizer.token_to_id("<|notimestamps|>")
69 |
70 | @property
71 | def timestamp_begin(self) -> int:
72 | return self.no_timestamps + 1
73 |
74 | @property
75 | def sot_sequence(self) -> List[int]:
76 | sequence = [self.sot]
77 |
78 | if self.language is not None:
79 | sequence.append(self.language)
80 |
81 | if self.task is not None:
82 | sequence.append(self.task)
83 |
84 | return sequence
85 |
86 | def encode(self, text: str) -> List[int]:
87 | return self.tokenizer.encode(text, add_special_tokens=False).ids
88 |
89 | def decode(self, tokens: List[int]) -> str:
90 | text_tokens = [token for token in tokens if token < self.eot]
91 | return self.tokenizer.decode(text_tokens)
92 |
93 | def decode_with_timestamps(self, tokens: List[int]) -> str:
94 | outputs = [[]]
95 |
96 | for token in tokens:
97 | if token >= self.timestamp_begin:
98 | timestamp = f"<|{(token - self.timestamp_begin) * 0.02:.2f}|>"
99 | outputs.append(timestamp)
100 | outputs.append([])
101 | else:
102 | outputs[-1].append(token)
103 |
104 | return "".join(
105 | [s if isinstance(s, str) else self.tokenizer.decode(s) for s in outputs]
106 | )
107 |
108 | @cached_property
109 | def non_speech_tokens(self) -> Tuple[int]:
110 | """
111 | Returns the list of tokens to suppress in order to avoid any speaker tags or non-speech
112 | annotations, to prevent sampling texts that are not actually spoken in the audio, e.g.
113 |
114 | - ♪♪♪
115 | - ( SPEAKING FOREIGN LANGUAGE )
116 | - [DAVID] Hey there,
117 |
118 | keeping basic punctuations like commas, periods, question marks, exclamation points, etc.
119 | """
120 | symbols = list('"#()*+/:;<=>@[\\]^_`{|}~「」『』')
121 | symbols += (
122 | "<< >> <<< >>> -- --- -( -[ (' (\" (( )) ((( ))) [[ ]] {{ }} ♪♪ ♪♪♪".split()
123 | )
124 |
125 | # symbols that may be a single token or multiple tokens depending on the tokenizer.
126 | # In case they're multiple tokens, suppress the first token, which is safe because:
127 | # These are between U+2640 and U+267F miscellaneous symbols that are okay to suppress
128 | # in generations, and in the 3-byte UTF-8 representation they share the first two bytes.
129 | miscellaneous = set("♩♪♫♬♭♮♯")
130 | assert all(0x2640 <= ord(c) <= 0x267F for c in miscellaneous)
131 |
132 | # allow hyphens "-" and single quotes "'" between words, but not at the beginning of a word
133 | result = {self.encode(" -")[0], self.encode(" '")[0]}
134 | for symbol in symbols + list(miscellaneous):
135 | for tokens in [
136 | self.encode(symbol),
137 | self.encode(" " + symbol),
138 | ]:
139 | if len(tokens) == 1 or symbol in miscellaneous:
140 | result.add(tokens[0])
141 |
142 | return tuple(sorted(result))
143 |
144 | def split_to_word_tokens(
145 | self, tokens: List[int]
146 | ) -> Tuple[List[str], List[List[int]]]:
147 | if self.language_code in {"zh", "ja", "th", "lo", "my", "yue"}:
148 | # These languages don't typically use spaces, so it is difficult to split words
149 | # without morpheme analysis. Here, we instead split words at any
150 | # position where the tokens are decoded as valid unicode points
151 | return self.split_tokens_on_unicode(tokens)
152 |
153 | return self.split_tokens_on_spaces(tokens)
154 |
155 | def split_tokens_on_unicode(
156 | self, tokens: List[int]
157 | ) -> Tuple[List[str], List[List[int]]]:
158 | decoded_full = self.decode_with_timestamps(tokens)
159 | replacement_char = "\ufffd"
160 |
161 | words = []
162 | word_tokens = []
163 | current_tokens = []
164 | unicode_offset = 0
165 |
166 | for token in tokens:
167 | current_tokens.append(token)
168 | decoded = self.decode_with_timestamps(current_tokens)
169 |
170 | try:
171 | replacement_char_index = decoded.index(replacement_char)
172 | replacement_char_index += unicode_offset
173 | except ValueError:
174 | replacement_char_index = None
175 |
176 | if replacement_char_index is None or (
177 | replacement_char_index < len(decoded_full)
178 | and decoded_full[replacement_char_index] == replacement_char
179 | ):
180 | words.append(decoded)
181 | word_tokens.append(current_tokens)
182 | current_tokens = []
183 | unicode_offset += len(decoded)
184 |
185 | return words, word_tokens
186 |
187 | def split_tokens_on_spaces(
188 | self, tokens: List[int]
189 | ) -> Tuple[List[str], List[List[int]]]:
190 | subwords, subword_tokens_list = self.split_tokens_on_unicode(tokens)
191 | words = []
192 | word_tokens = []
193 |
194 | for subword, subword_tokens in zip(subwords, subword_tokens_list):
195 | special = subword_tokens[0] >= self.eot
196 | with_space = subword.startswith(" ")
197 | punctuation = subword.strip() in string.punctuation
198 | if special or with_space or punctuation or len(words) == 0:
199 | words.append(subword)
200 | word_tokens.append(subword_tokens)
201 | else:
202 | words[-1] = words[-1] + subword
203 | word_tokens[-1].extend(subword_tokens)
204 |
205 | return words, word_tokens
206 |
207 |
208 | _TASKS = (
209 | "transcribe",
210 | "translate",
211 | )
212 |
213 | _LANGUAGE_CODES = (
214 | "af",
215 | "am",
216 | "ar",
217 | "as",
218 | "az",
219 | "ba",
220 | "be",
221 | "bg",
222 | "bn",
223 | "bo",
224 | "br",
225 | "bs",
226 | "ca",
227 | "cs",
228 | "cy",
229 | "da",
230 | "de",
231 | "el",
232 | "en",
233 | "es",
234 | "et",
235 | "eu",
236 | "fa",
237 | "fi",
238 | "fo",
239 | "fr",
240 | "gl",
241 | "gu",
242 | "ha",
243 | "haw",
244 | "he",
245 | "hi",
246 | "hr",
247 | "ht",
248 | "hu",
249 | "hy",
250 | "id",
251 | "is",
252 | "it",
253 | "ja",
254 | "jw",
255 | "ka",
256 | "kk",
257 | "km",
258 | "kn",
259 | "ko",
260 | "la",
261 | "lb",
262 | "ln",
263 | "lo",
264 | "lt",
265 | "lv",
266 | "mg",
267 | "mi",
268 | "mk",
269 | "ml",
270 | "mn",
271 | "mr",
272 | "ms",
273 | "mt",
274 | "my",
275 | "ne",
276 | "nl",
277 | "nn",
278 | "no",
279 | "oc",
280 | "pa",
281 | "pl",
282 | "ps",
283 | "pt",
284 | "ro",
285 | "ru",
286 | "sa",
287 | "sd",
288 | "si",
289 | "sk",
290 | "sl",
291 | "sn",
292 | "so",
293 | "sq",
294 | "sr",
295 | "su",
296 | "sv",
297 | "sw",
298 | "ta",
299 | "te",
300 | "tg",
301 | "th",
302 | "tk",
303 | "tl",
304 | "tr",
305 | "tt",
306 | "uk",
307 | "ur",
308 | "uz",
309 | "vi",
310 | "yi",
311 | "yo",
312 | "zh",
313 | "yue",
314 | )
315 |
--------------------------------------------------------------------------------
/faster_whisper/utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import re
4 |
5 | from typing import List, Optional, Union
6 |
7 | import huggingface_hub
8 | import requests
9 |
10 | from tqdm.auto import tqdm
11 |
12 | _MODELS = {
13 | "tiny.en": "Systran/faster-whisper-tiny.en",
14 | "tiny": "Systran/faster-whisper-tiny",
15 | "base.en": "Systran/faster-whisper-base.en",
16 | "base": "Systran/faster-whisper-base",
17 | "small.en": "Systran/faster-whisper-small.en",
18 | "small": "Systran/faster-whisper-small",
19 | "medium.en": "Systran/faster-whisper-medium.en",
20 | "medium": "Systran/faster-whisper-medium",
21 | "large-v1": "Systran/faster-whisper-large-v1",
22 | "large-v2": "Systran/faster-whisper-large-v2",
23 | "large-v3": "Systran/faster-whisper-large-v3",
24 | "large": "Systran/faster-whisper-large-v3",
25 | "distil-large-v2": "Systran/faster-distil-whisper-large-v2",
26 | "distil-medium.en": "Systran/faster-distil-whisper-medium.en",
27 | "distil-small.en": "Systran/faster-distil-whisper-small.en",
28 | "distil-large-v3": "Systran/faster-distil-whisper-large-v3",
29 | "distil-large-v3.5": "distil-whisper/distil-large-v3.5-ct2",
30 | "large-v3-turbo": "mobiuslabsgmbh/faster-whisper-large-v3-turbo",
31 | "turbo": "mobiuslabsgmbh/faster-whisper-large-v3-turbo",
32 | }
33 |
34 |
35 | def available_models() -> List[str]:
36 | """Returns the names of available models."""
37 | return list(_MODELS.keys())
38 |
39 |
40 | def get_assets_path():
41 | """Returns the path to the assets directory."""
42 | return os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets")
43 |
44 |
45 | def get_logger():
46 | """Returns the module logger."""
47 | return logging.getLogger("faster_whisper")
48 |
49 |
50 | def download_model(
51 | size_or_id: str,
52 | output_dir: Optional[str] = None,
53 | local_files_only: bool = False,
54 | cache_dir: Optional[str] = None,
55 | revision: Optional[str] = None,
56 | use_auth_token: Optional[Union[str, bool]] = None,
57 | ):
58 | """Downloads a CTranslate2 Whisper model from the Hugging Face Hub.
59 |
60 | Args:
61 | size_or_id: Size of the model to download from https://huggingface.co/Systran
62 | (tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en,
63 | distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2,
64 | distil-large-v3), or a CTranslate2-converted model ID from the Hugging Face Hub
65 | (e.g. Systran/faster-whisper-large-v3).
66 | output_dir: Directory where the model should be saved. If not set, the model is saved in
67 | the cache directory.
68 | local_files_only: If True, avoid downloading the file and return the path to the local
69 | cached file if it exists.
70 | cache_dir: Path to the folder where cached files are stored.
71 | revision: An optional Git revision id which can be a branch name, a tag, or a
72 | commit hash.
73 | use_auth_token: HuggingFace authentication token or True to use the
74 | token stored by the HuggingFace config folder.
75 |
76 | Returns:
77 | The path to the downloaded model.
78 |
79 | Raises:
80 | ValueError: if the model size is invalid.
81 | """
82 | if re.match(r".*/.*", size_or_id):
83 | repo_id = size_or_id
84 | else:
85 | repo_id = _MODELS.get(size_or_id)
86 | if repo_id is None:
87 | raise ValueError(
88 | "Invalid model size '%s', expected one of: %s"
89 | % (size_or_id, ", ".join(_MODELS.keys()))
90 | )
91 |
92 | allow_patterns = [
93 | "config.json",
94 | "preprocessor_config.json",
95 | "model.bin",
96 | "tokenizer.json",
97 | "vocabulary.*",
98 | ]
99 |
100 | kwargs = {
101 | "local_files_only": local_files_only,
102 | "allow_patterns": allow_patterns,
103 | "tqdm_class": disabled_tqdm,
104 | "revision": revision,
105 | }
106 |
107 | if output_dir is not None:
108 | kwargs["local_dir"] = output_dir
109 | kwargs["local_dir_use_symlinks"] = False
110 |
111 | if cache_dir is not None:
112 | kwargs["cache_dir"] = cache_dir
113 |
114 | if use_auth_token is not None:
115 | kwargs["token"] = use_auth_token
116 |
117 | try:
118 | return huggingface_hub.snapshot_download(repo_id, **kwargs)
119 | except (
120 | huggingface_hub.utils.HfHubHTTPError,
121 | requests.exceptions.ConnectionError,
122 | ) as exception:
123 | logger = get_logger()
124 | logger.warning(
125 | "An error occured while synchronizing the model %s from the Hugging Face Hub:\n%s",
126 | repo_id,
127 | exception,
128 | )
129 | logger.warning(
130 | "Trying to load the model directly from the local cache, if it exists."
131 | )
132 |
133 | kwargs["local_files_only"] = True
134 | return huggingface_hub.snapshot_download(repo_id, **kwargs)
135 |
136 |
137 | def format_timestamp(
138 | seconds: float,
139 | always_include_hours: bool = False,
140 | decimal_marker: str = ".",
141 | ) -> str:
142 | assert seconds >= 0, "non-negative timestamp expected"
143 | milliseconds = round(seconds * 1000.0)
144 |
145 | hours = milliseconds // 3_600_000
146 | milliseconds -= hours * 3_600_000
147 |
148 | minutes = milliseconds // 60_000
149 | milliseconds -= minutes * 60_000
150 |
151 | seconds = milliseconds // 1_000
152 | milliseconds -= seconds * 1_000
153 |
154 | hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
155 | return (
156 | f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
157 | )
158 |
159 |
160 | class disabled_tqdm(tqdm):
161 | def __init__(self, *args, **kwargs):
162 | kwargs["disable"] = True
163 | super().__init__(*args, **kwargs)
164 |
165 |
166 | def get_end(segments: List[dict]) -> Optional[float]:
167 | return next(
168 | (w["end"] for s in reversed(segments) for w in reversed(s["words"])),
169 | segments[-1]["end"] if segments else None,
170 | )
171 |
--------------------------------------------------------------------------------
/faster_whisper/vad.py:
--------------------------------------------------------------------------------
1 | import bisect
2 | import functools
3 | import os
4 |
5 | from dataclasses import dataclass
6 | from typing import Dict, List, Optional, Tuple
7 |
8 | import numpy as np
9 |
10 | from faster_whisper.utils import get_assets_path
11 |
12 |
13 | # The code below is adapted from https://github.com/snakers4/silero-vad.
14 | @dataclass
15 | class VadOptions:
16 | """VAD options.
17 |
18 | Attributes:
19 | threshold: Speech threshold. Silero VAD outputs speech probabilities for each audio chunk,
20 | probabilities ABOVE this value are considered as SPEECH. It is better to tune this
21 | parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets.
22 | neg_threshold: Silence threshold for determining the end of speech. If a probability is lower
23 | than neg_threshold, it is always considered silence. Values higher than neg_threshold
24 | are only considered speech if the previous sample was classified as speech; otherwise,
25 | they are treated as silence. This parameter helps refine the detection of speech
26 | transitions, ensuring smoother segment boundaries.
27 | min_speech_duration_ms: Final speech chunks shorter min_speech_duration_ms are thrown out.
28 | max_speech_duration_s: Maximum duration of speech chunks in seconds. Chunks longer
29 | than max_speech_duration_s will be split at the timestamp of the last silence that
30 | lasts more than 100ms (if any), to prevent aggressive cutting. Otherwise, they will be
31 | split aggressively just before max_speech_duration_s.
32 | min_silence_duration_ms: In the end of each speech chunk wait for min_silence_duration_ms
33 | before separating it
34 | speech_pad_ms: Final speech chunks are padded by speech_pad_ms each side
35 | """
36 |
37 | threshold: float = 0.5
38 | neg_threshold: float = None
39 | min_speech_duration_ms: int = 0
40 | max_speech_duration_s: float = float("inf")
41 | min_silence_duration_ms: int = 2000
42 | speech_pad_ms: int = 400
43 |
44 |
45 | def get_speech_timestamps(
46 | audio: np.ndarray,
47 | vad_options: Optional[VadOptions] = None,
48 | sampling_rate: int = 16000,
49 | **kwargs,
50 | ) -> List[dict]:
51 | """This method is used for splitting long audios into speech chunks using silero VAD.
52 |
53 | Args:
54 | audio: One dimensional float array.
55 | vad_options: Options for VAD processing.
56 | sampling rate: Sampling rate of the audio.
57 | kwargs: VAD options passed as keyword arguments for backward compatibility.
58 |
59 | Returns:
60 | List of dicts containing begin and end samples of each speech chunk.
61 | """
62 | if vad_options is None:
63 | vad_options = VadOptions(**kwargs)
64 |
65 | threshold = vad_options.threshold
66 | neg_threshold = vad_options.neg_threshold
67 | min_speech_duration_ms = vad_options.min_speech_duration_ms
68 | max_speech_duration_s = vad_options.max_speech_duration_s
69 | min_silence_duration_ms = vad_options.min_silence_duration_ms
70 | window_size_samples = 512
71 | speech_pad_ms = vad_options.speech_pad_ms
72 | min_speech_samples = sampling_rate * min_speech_duration_ms / 1000
73 | speech_pad_samples = sampling_rate * speech_pad_ms / 1000
74 | max_speech_samples = (
75 | sampling_rate * max_speech_duration_s
76 | - window_size_samples
77 | - 2 * speech_pad_samples
78 | )
79 | min_silence_samples = sampling_rate * min_silence_duration_ms / 1000
80 | min_silence_samples_at_max_speech = sampling_rate * 98 / 1000
81 |
82 | audio_length_samples = len(audio)
83 |
84 | model = get_vad_model()
85 |
86 | padded_audio = np.pad(
87 | audio, (0, window_size_samples - audio.shape[0] % window_size_samples)
88 | )
89 | speech_probs = model(padded_audio.reshape(1, -1)).squeeze(0)
90 |
91 | triggered = False
92 | speeches = []
93 | current_speech = {}
94 | if neg_threshold is None:
95 | neg_threshold = max(threshold - 0.15, 0.01)
96 |
97 | # to save potential segment end (and tolerate some silence)
98 | temp_end = 0
99 | # to save potential segment limits in case of maximum segment size reached
100 | prev_end = next_start = 0
101 |
102 | for i, speech_prob in enumerate(speech_probs):
103 | if (speech_prob >= threshold) and temp_end:
104 | temp_end = 0
105 | if next_start < prev_end:
106 | next_start = window_size_samples * i
107 |
108 | if (speech_prob >= threshold) and not triggered:
109 | triggered = True
110 | current_speech["start"] = window_size_samples * i
111 | continue
112 |
113 | if (
114 | triggered
115 | and (window_size_samples * i) - current_speech["start"] > max_speech_samples
116 | ):
117 | if prev_end:
118 | current_speech["end"] = prev_end
119 | speeches.append(current_speech)
120 | current_speech = {}
121 | # previously reached silence (< neg_thres) and is still not speech (< thres)
122 | if next_start < prev_end:
123 | triggered = False
124 | else:
125 | current_speech["start"] = next_start
126 | prev_end = next_start = temp_end = 0
127 | else:
128 | current_speech["end"] = window_size_samples * i
129 | speeches.append(current_speech)
130 | current_speech = {}
131 | prev_end = next_start = temp_end = 0
132 | triggered = False
133 | continue
134 |
135 | if (speech_prob < neg_threshold) and triggered:
136 | if not temp_end:
137 | temp_end = window_size_samples * i
138 | # condition to avoid cutting in very short silence
139 | if (window_size_samples * i) - temp_end > min_silence_samples_at_max_speech:
140 | prev_end = temp_end
141 | if (window_size_samples * i) - temp_end < min_silence_samples:
142 | continue
143 | else:
144 | current_speech["end"] = temp_end
145 | if (
146 | current_speech["end"] - current_speech["start"]
147 | ) > min_speech_samples:
148 | speeches.append(current_speech)
149 | current_speech = {}
150 | prev_end = next_start = temp_end = 0
151 | triggered = False
152 | continue
153 |
154 | if (
155 | current_speech
156 | and (audio_length_samples - current_speech["start"]) > min_speech_samples
157 | ):
158 | current_speech["end"] = audio_length_samples
159 | speeches.append(current_speech)
160 |
161 | for i, speech in enumerate(speeches):
162 | if i == 0:
163 | speech["start"] = int(max(0, speech["start"] - speech_pad_samples))
164 | if i != len(speeches) - 1:
165 | silence_duration = speeches[i + 1]["start"] - speech["end"]
166 | if silence_duration < 2 * speech_pad_samples:
167 | speech["end"] += int(silence_duration // 2)
168 | speeches[i + 1]["start"] = int(
169 | max(0, speeches[i + 1]["start"] - silence_duration // 2)
170 | )
171 | else:
172 | speech["end"] = int(
173 | min(audio_length_samples, speech["end"] + speech_pad_samples)
174 | )
175 | speeches[i + 1]["start"] = int(
176 | max(0, speeches[i + 1]["start"] - speech_pad_samples)
177 | )
178 | else:
179 | speech["end"] = int(
180 | min(audio_length_samples, speech["end"] + speech_pad_samples)
181 | )
182 |
183 | return speeches
184 |
185 |
186 | def collect_chunks(
187 | audio: np.ndarray, chunks: List[dict], sampling_rate: int = 16000
188 | ) -> Tuple[List[np.ndarray], List[Dict[str, int]]]:
189 | """Collects audio chunks."""
190 | if not chunks:
191 | chunk_metadata = {
192 | "start_time": 0,
193 | "end_time": 0,
194 | }
195 | return [np.array([], dtype=np.float32)], [chunk_metadata]
196 |
197 | audio_chunks = []
198 | chunks_metadata = []
199 | for chunk in chunks:
200 | chunk_metadata = {
201 | "start_time": chunk["start"] / sampling_rate,
202 | "end_time": chunk["end"] / sampling_rate,
203 | }
204 | audio_chunks.append(audio[chunk["start"] : chunk["end"]])
205 | chunks_metadata.append(chunk_metadata)
206 | return audio_chunks, chunks_metadata
207 |
208 |
209 | class SpeechTimestampsMap:
210 | """Helper class to restore original speech timestamps."""
211 |
212 | def __init__(self, chunks: List[dict], sampling_rate: int, time_precision: int = 2):
213 | self.sampling_rate = sampling_rate
214 | self.time_precision = time_precision
215 | self.chunk_end_sample = []
216 | self.total_silence_before = []
217 |
218 | previous_end = 0
219 | silent_samples = 0
220 |
221 | for chunk in chunks:
222 | silent_samples += chunk["start"] - previous_end
223 | previous_end = chunk["end"]
224 |
225 | self.chunk_end_sample.append(chunk["end"] - silent_samples)
226 | self.total_silence_before.append(silent_samples / sampling_rate)
227 |
228 | def get_original_time(
229 | self,
230 | time: float,
231 | chunk_index: Optional[int] = None,
232 | ) -> float:
233 | if chunk_index is None:
234 | chunk_index = self.get_chunk_index(time)
235 |
236 | total_silence_before = self.total_silence_before[chunk_index]
237 | return round(total_silence_before + time, self.time_precision)
238 |
239 | def get_chunk_index(self, time: float) -> int:
240 | sample = int(time * self.sampling_rate)
241 | return min(
242 | bisect.bisect(self.chunk_end_sample, sample),
243 | len(self.chunk_end_sample) - 1,
244 | )
245 |
246 |
247 | @functools.lru_cache
248 | def get_vad_model():
249 | """Returns the VAD model instance."""
250 | encoder_path = os.path.join(get_assets_path(), "silero_encoder_v5.onnx")
251 | decoder_path = os.path.join(get_assets_path(), "silero_decoder_v5.onnx")
252 | return SileroVADModel(encoder_path, decoder_path)
253 |
254 |
255 | class SileroVADModel:
256 | def __init__(self, encoder_path, decoder_path):
257 | try:
258 | import onnxruntime
259 | except ImportError as e:
260 | raise RuntimeError(
261 | "Applying the VAD filter requires the onnxruntime package"
262 | ) from e
263 |
264 | opts = onnxruntime.SessionOptions()
265 | opts.inter_op_num_threads = 1
266 | opts.intra_op_num_threads = 1
267 | opts.enable_cpu_mem_arena = False
268 | opts.log_severity_level = 4
269 |
270 | self.encoder_session = onnxruntime.InferenceSession(
271 | encoder_path,
272 | providers=["CPUExecutionProvider"],
273 | sess_options=opts,
274 | )
275 | self.decoder_session = onnxruntime.InferenceSession(
276 | decoder_path,
277 | providers=["CPUExecutionProvider"],
278 | sess_options=opts,
279 | )
280 |
281 | def __call__(
282 | self, audio: np.ndarray, num_samples: int = 512, context_size_samples: int = 64
283 | ):
284 | assert (
285 | audio.ndim == 2
286 | ), "Input should be a 2D array with size (batch_size, num_samples)"
287 | assert (
288 | audio.shape[1] % num_samples == 0
289 | ), "Input size should be a multiple of num_samples"
290 |
291 | batch_size = audio.shape[0]
292 |
293 | state = np.zeros((2, batch_size, 128), dtype="float32")
294 | context = np.zeros(
295 | (batch_size, context_size_samples),
296 | dtype="float32",
297 | )
298 |
299 | batched_audio = audio.reshape(batch_size, -1, num_samples)
300 | context = batched_audio[..., -context_size_samples:]
301 | context[:, -1] = 0
302 | context = np.roll(context, 1, 1)
303 | batched_audio = np.concatenate([context, batched_audio], 2)
304 |
305 | batched_audio = batched_audio.reshape(-1, num_samples + context_size_samples)
306 |
307 | encoder_batch_size = 10000
308 | num_segments = batched_audio.shape[0]
309 | encoder_outputs = []
310 | for i in range(0, num_segments, encoder_batch_size):
311 | encoder_output = self.encoder_session.run(
312 | None, {"input": batched_audio[i : i + encoder_batch_size]}
313 | )[0]
314 | encoder_outputs.append(encoder_output)
315 |
316 | encoder_output = np.concatenate(encoder_outputs, axis=0)
317 | encoder_output = encoder_output.reshape(batch_size, -1, 128)
318 |
319 | decoder_outputs = []
320 | for window in np.split(encoder_output, encoder_output.shape[1], axis=1):
321 | out, state = self.decoder_session.run(
322 | None, {"input": window.squeeze(1), "state": state}
323 | )
324 | decoder_outputs.append(out)
325 |
326 | out = np.stack(decoder_outputs, axis=1).squeeze(-1)
327 | return out
328 |
329 |
330 | def merge_segments(segments_list, vad_options: VadOptions, sampling_rate: int = 16000):
331 | if not segments_list:
332 | return []
333 |
334 | curr_end = 0
335 | seg_idxs = []
336 | merged_segments = []
337 | edge_padding = vad_options.speech_pad_ms * sampling_rate // 1000
338 | chunk_length = vad_options.max_speech_duration_s * sampling_rate
339 |
340 | curr_start = segments_list[0]["start"]
341 |
342 | for idx, seg in enumerate(segments_list):
343 | # if any segment start timing is less than previous segment end timing,
344 | # reset the edge padding. Similarly for end timing.
345 | if idx > 0:
346 | if seg["start"] < segments_list[idx - 1]["end"]:
347 | seg["start"] += edge_padding
348 | if idx < len(segments_list) - 1:
349 | if seg["end"] > segments_list[idx + 1]["start"]:
350 | seg["end"] -= edge_padding
351 |
352 | if seg["end"] - curr_start > chunk_length and curr_end - curr_start > 0:
353 | merged_segments.append(
354 | {
355 | "start": curr_start,
356 | "end": curr_end,
357 | "segments": seg_idxs,
358 | }
359 | )
360 | curr_start = seg["start"]
361 | seg_idxs = []
362 | curr_end = seg["end"]
363 | seg_idxs.append((seg["start"], seg["end"]))
364 | # add final
365 | merged_segments.append(
366 | {
367 | "start": curr_start,
368 | "end": curr_end,
369 | "segments": seg_idxs,
370 | }
371 | )
372 | return merged_segments
373 |
--------------------------------------------------------------------------------
/faster_whisper/version.py:
--------------------------------------------------------------------------------
1 | """Version information."""
2 |
3 | __version__ = "1.1.1"
4 |
--------------------------------------------------------------------------------
/requirements.conversion.txt:
--------------------------------------------------------------------------------
1 | transformers[torch]>=4.23
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ctranslate2>=4.0,<5
2 | huggingface_hub>=0.13
3 | tokenizers>=0.13,<1
4 | onnxruntime>=1.14,<2
5 | av>=11
6 | tqdm
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 100
3 | ignore =
4 | E203,
5 | W503,
6 |
7 | [isort]
8 | profile=black
9 | lines_between_types=1
10 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from setuptools import find_packages, setup
4 |
5 | base_dir = os.path.dirname(os.path.abspath(__file__))
6 |
7 |
8 | def get_long_description():
9 | readme_path = os.path.join(base_dir, "README.md")
10 | with open(readme_path, encoding="utf-8") as readme_file:
11 | return readme_file.read()
12 |
13 |
14 | def get_project_version():
15 | version_path = os.path.join(base_dir, "faster_whisper", "version.py")
16 | version = {}
17 | with open(version_path, encoding="utf-8") as fp:
18 | exec(fp.read(), version)
19 | return version["__version__"]
20 |
21 |
22 | def get_requirements(path):
23 | with open(path, encoding="utf-8") as requirements:
24 | return [requirement.strip() for requirement in requirements]
25 |
26 |
27 | install_requires = get_requirements(os.path.join(base_dir, "requirements.txt"))
28 | conversion_requires = get_requirements(
29 | os.path.join(base_dir, "requirements.conversion.txt")
30 | )
31 |
32 | setup(
33 | name="faster-whisper",
34 | version=get_project_version(),
35 | license="MIT",
36 | description="Faster Whisper transcription with CTranslate2",
37 | long_description=get_long_description(),
38 | long_description_content_type="text/markdown",
39 | author="Guillaume Klein",
40 | url="https://github.com/SYSTRAN/faster-whisper",
41 | classifiers=[
42 | "Development Status :: 4 - Beta",
43 | "Intended Audience :: Developers",
44 | "Intended Audience :: Science/Research",
45 | "License :: OSI Approved :: MIT License",
46 | "Programming Language :: Python :: 3",
47 | "Programming Language :: Python :: 3 :: Only",
48 | "Programming Language :: Python :: 3.9",
49 | "Programming Language :: Python :: 3.10",
50 | "Programming Language :: Python :: 3.11",
51 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
52 | ],
53 | keywords="openai whisper speech ctranslate2 inference quantization transformer",
54 | python_requires=">=3.9",
55 | install_requires=install_requires,
56 | extras_require={
57 | "conversion": conversion_requires,
58 | "dev": [
59 | "black==23.*",
60 | "flake8==6.*",
61 | "isort==5.*",
62 | "pytest==7.*",
63 | ],
64 | },
65 | packages=find_packages(),
66 | include_package_data=True,
67 | )
68 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 |
5 |
6 | @pytest.fixture
7 | def data_dir():
8 | return os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
9 |
10 |
11 | @pytest.fixture
12 | def jfk_path(data_dir):
13 | return os.path.join(data_dir, "jfk.flac")
14 |
15 |
16 | @pytest.fixture
17 | def physcisworks_path(data_dir):
18 | return os.path.join(data_dir, "physicsworks.wav")
19 |
--------------------------------------------------------------------------------
/tests/data/hotwords.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/hotwords.mp3
--------------------------------------------------------------------------------
/tests/data/jfk.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/jfk.flac
--------------------------------------------------------------------------------
/tests/data/multilingual.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/multilingual.mp3
--------------------------------------------------------------------------------
/tests/data/physicsworks.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/physicsworks.wav
--------------------------------------------------------------------------------
/tests/data/stereo_diarization.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SYSTRAN/faster-whisper/d3bfd0a305eb9d97c08047c82149c1998cc90fcb/tests/data/stereo_diarization.wav
--------------------------------------------------------------------------------
/tests/test_tokenizer.py:
--------------------------------------------------------------------------------
1 | from faster_whisper import WhisperModel
2 | from faster_whisper.tokenizer import Tokenizer
3 | from faster_whisper.transcribe import get_suppressed_tokens
4 |
5 |
6 | def test_suppressed_tokens_minus_1():
7 | model = WhisperModel("tiny.en")
8 |
9 | tokenizer = Tokenizer(model.hf_tokenizer, False)
10 | tokens = get_suppressed_tokens(tokenizer, [-1])
11 | assert tokens == (
12 | 1,
13 | 2,
14 | 7,
15 | 8,
16 | 9,
17 | 10,
18 | 14,
19 | 25,
20 | 26,
21 | 27,
22 | 28,
23 | 29,
24 | 31,
25 | 58,
26 | 59,
27 | 60,
28 | 61,
29 | 62,
30 | 63,
31 | 90,
32 | 91,
33 | 92,
34 | 93,
35 | 357,
36 | 366,
37 | 438,
38 | 532,
39 | 685,
40 | 705,
41 | 796,
42 | 930,
43 | 1058,
44 | 1220,
45 | 1267,
46 | 1279,
47 | 1303,
48 | 1343,
49 | 1377,
50 | 1391,
51 | 1635,
52 | 1782,
53 | 1875,
54 | 2162,
55 | 2361,
56 | 2488,
57 | 3467,
58 | 4008,
59 | 4211,
60 | 4600,
61 | 4808,
62 | 5299,
63 | 5855,
64 | 6329,
65 | 7203,
66 | 9609,
67 | 9959,
68 | 10563,
69 | 10786,
70 | 11420,
71 | 11709,
72 | 11907,
73 | 13163,
74 | 13697,
75 | 13700,
76 | 14808,
77 | 15306,
78 | 16410,
79 | 16791,
80 | 17992,
81 | 19203,
82 | 19510,
83 | 20724,
84 | 22305,
85 | 22935,
86 | 27007,
87 | 30109,
88 | 30420,
89 | 33409,
90 | 34949,
91 | 40283,
92 | 40493,
93 | 40549,
94 | 47282,
95 | 49146,
96 | 50257,
97 | 50357,
98 | 50358,
99 | 50359,
100 | 50360,
101 | )
102 |
103 |
104 | def test_suppressed_tokens_minus_value():
105 | model = WhisperModel("tiny.en")
106 |
107 | tokenizer = Tokenizer(model.hf_tokenizer, False)
108 | tokens = get_suppressed_tokens(tokenizer, [13])
109 | assert tokens == (13, 50257, 50357, 50358, 50359, 50360)
110 |
111 |
112 | def test_split_on_unicode():
113 | model = WhisperModel("tiny")
114 | tokenizer = Tokenizer(model.hf_tokenizer, False)
115 |
116 | tokens = [8404, 871, 287, 6, 246, 526, 3210, 20378]
117 | words, word_tokens = tokenizer.split_tokens_on_unicode(tokens)
118 |
119 | assert words == [" elle", " est", " l", "'", "\ufffd", "é", "rit", "oire"]
120 | assert word_tokens == [[8404], [871], [287], [6], [246], [526], [3210], [20378]]
121 |
--------------------------------------------------------------------------------
/tests/test_transcribe.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 |
4 | import numpy as np
5 |
6 | from faster_whisper import BatchedInferencePipeline, WhisperModel, decode_audio
7 |
8 |
9 | def test_supported_languages():
10 | model = WhisperModel("tiny.en")
11 | assert model.supported_languages == ["en"]
12 |
13 |
14 | def test_transcribe(jfk_path):
15 | model = WhisperModel("tiny")
16 | segments, info = model.transcribe(jfk_path, word_timestamps=True)
17 | assert info.all_language_probs is not None
18 |
19 | assert info.language == "en"
20 | assert info.language_probability > 0.9
21 | assert info.duration == 11
22 |
23 | # Get top language info from all results, which should match the
24 | # already existing metadata
25 | top_lang, top_lang_score = info.all_language_probs[0]
26 | assert info.language == top_lang
27 | assert abs(info.language_probability - top_lang_score) < 1e-16
28 |
29 | segments = list(segments)
30 |
31 | assert len(segments) == 1
32 |
33 | segment = segments[0]
34 |
35 | assert segment.text == (
36 | " And so my fellow Americans, ask not what your country can do for you, "
37 | "ask what you can do for your country."
38 | )
39 |
40 | assert segment.text == "".join(word.word for word in segment.words)
41 | assert segment.start == segment.words[0].start
42 | assert segment.end == segment.words[-1].end
43 | batched_model = BatchedInferencePipeline(model=model)
44 | result, info = batched_model.transcribe(
45 | jfk_path, word_timestamps=True, vad_filter=False
46 | )
47 | assert info.language == "en"
48 | assert info.language_probability > 0.7
49 | segments = []
50 | for segment in result:
51 | segments.append(
52 | {"start": segment.start, "end": segment.end, "text": segment.text}
53 | )
54 |
55 | assert len(segments) == 1
56 | assert segment.text == (
57 | " And so my fellow Americans ask not what your country can do for you, "
58 | "ask what you can do for your country."
59 | )
60 |
61 |
62 | def test_batched_transcribe(physcisworks_path):
63 | model = WhisperModel("tiny")
64 | batched_model = BatchedInferencePipeline(model=model)
65 | result, info = batched_model.transcribe(physcisworks_path, batch_size=16)
66 | assert info.language == "en"
67 | assert info.language_probability > 0.7
68 | segments = []
69 | for segment in result:
70 | segments.append(
71 | {"start": segment.start, "end": segment.end, "text": segment.text}
72 | )
73 | # number of near 30 sec segments
74 | assert len(segments) == 7
75 |
76 | result, info = batched_model.transcribe(
77 | physcisworks_path,
78 | batch_size=16,
79 | without_timestamps=False,
80 | word_timestamps=True,
81 | )
82 | segments = []
83 | for segment in result:
84 | assert segment.words is not None
85 | segments.append(
86 | {"start": segment.start, "end": segment.end, "text": segment.text}
87 | )
88 | assert len(segments) > 7
89 |
90 |
91 | def test_empty_audio():
92 | audio = np.asarray([], dtype="float32")
93 | model = WhisperModel("tiny")
94 | pipeline = BatchedInferencePipeline(model=model)
95 | assert list(model.transcribe(audio)[0]) == []
96 | assert list(pipeline.transcribe(audio)[0]) == []
97 | model.detect_language(audio)
98 |
99 |
100 | def test_prefix_with_timestamps(jfk_path):
101 | model = WhisperModel("tiny")
102 | segments, _ = model.transcribe(jfk_path, prefix="And so my fellow Americans")
103 | segments = list(segments)
104 |
105 | assert len(segments) == 1
106 |
107 | segment = segments[0]
108 |
109 | assert segment.text == (
110 | " And so my fellow Americans, ask not what your country can do for you, "
111 | "ask what you can do for your country."
112 | )
113 |
114 | assert segment.start == 0
115 | assert 10 < segment.end <= 11
116 |
117 |
118 | def test_vad(jfk_path):
119 | model = WhisperModel("tiny")
120 | segments, info = model.transcribe(
121 | jfk_path,
122 | vad_filter=True,
123 | vad_parameters=dict(min_silence_duration_ms=500, speech_pad_ms=200),
124 | )
125 | segments = list(segments)
126 |
127 | assert len(segments) == 1
128 | segment = segments[0]
129 |
130 | assert segment.text == (
131 | " And so my fellow Americans ask not what your country can do for you, "
132 | "ask what you can do for your country."
133 | )
134 |
135 | assert 0 < segment.start < 1
136 | assert 10 < segment.end < 11
137 |
138 | assert info.vad_options.min_silence_duration_ms == 500
139 | assert info.vad_options.speech_pad_ms == 200
140 |
141 |
142 | def test_stereo_diarization(data_dir):
143 | model = WhisperModel("tiny")
144 |
145 | audio_path = os.path.join(data_dir, "stereo_diarization.wav")
146 | left, right = decode_audio(audio_path, split_stereo=True)
147 |
148 | segments, _ = model.transcribe(left)
149 | transcription = "".join(segment.text for segment in segments).strip()
150 | assert transcription == (
151 | "He began a confused complaint against the wizard, "
152 | "who had vanished behind the curtain on the left."
153 | )
154 |
155 | segments, _ = model.transcribe(right)
156 | transcription = "".join(segment.text for segment in segments).strip()
157 | assert transcription == "The horizon seems extremely distant."
158 |
159 |
160 | def test_multilingual_transcription(data_dir):
161 | model = WhisperModel("tiny")
162 | pipeline = BatchedInferencePipeline(model)
163 |
164 | audio_path = os.path.join(data_dir, "multilingual.mp3")
165 | audio = decode_audio(audio_path)
166 |
167 | segments, info = model.transcribe(
168 | audio,
169 | multilingual=True,
170 | without_timestamps=True,
171 | condition_on_previous_text=False,
172 | )
173 | segments = list(segments)
174 |
175 | assert (
176 | segments[0].text
177 | == " Permission is hereby granted, free of charge, to any person obtaining a copy of the"
178 | " software and associated documentation files to deal in the software without restriction,"
179 | " including without limitation the rights to use, copy, modify, merge, publish, distribute"
180 | ", sublicence, and or cell copies of the software, and to permit persons to whom the "
181 | "software is furnished to do so, subject to the following conditions. The above copyright"
182 | " notice and this permission notice, shall be included in all copies or substantial "
183 | "portions of the software."
184 | )
185 |
186 | assert (
187 | segments[1].text
188 | == " Jedem, der dieses Software und die dazu gehöregen Dokumentationsdatein erhält, wird "
189 | "hiermit unengeltlich die Genehmigung erteilt, wird der Software und eingeschränkt zu "
190 | "verfahren. Dies umfasst insbesondere das Recht, die Software zu verwenden, zu "
191 | "vervielfältigen, zu modifizieren, zu Samenzofügen, zu veröffentlichen, zu verteilen, "
192 | "unterzulizenzieren und oder kopieren der Software zu verkaufen und diese Rechte "
193 | "unterfolgen den Bedingungen anderen zu übertragen."
194 | )
195 |
196 | segments, info = pipeline.transcribe(audio, multilingual=True)
197 | segments = list(segments)
198 |
199 | assert (
200 | segments[0].text
201 | == " Permission is hereby granted, free of charge, to any person obtaining a copy of the"
202 | " software and associated documentation files to deal in the software without restriction,"
203 | " including without limitation the rights to use, copy, modify, merge, publish, distribute"
204 | ", sublicence, and or cell copies of the software, and to permit persons to whom the "
205 | "software is furnished to do so, subject to the following conditions. The above copyright"
206 | " notice and this permission notice, shall be included in all copies or substantial "
207 | "portions of the software."
208 | )
209 | assert (
210 | "Dokumentationsdatein erhält, wird hiermit unengeltlich die Genehmigung erteilt,"
211 | " wird der Software und eingeschränkt zu verfahren. Dies umfasst insbesondere das Recht,"
212 | " die Software zu verwenden, zu vervielfältigen, zu modifizieren"
213 | in segments[1].text
214 | )
215 |
216 |
217 | def test_hotwords(data_dir):
218 | model = WhisperModel("tiny")
219 | pipeline = BatchedInferencePipeline(model)
220 |
221 | audio_path = os.path.join(data_dir, "hotwords.mp3")
222 | audio = decode_audio(audio_path)
223 |
224 | segments, info = model.transcribe(audio, hotwords="ComfyUI")
225 | segments = list(segments)
226 |
227 | assert "ComfyUI" in segments[0].text
228 | assert info.transcription_options.hotwords == "ComfyUI"
229 |
230 | segments, info = pipeline.transcribe(audio, hotwords="ComfyUI")
231 | segments = list(segments)
232 |
233 | assert "ComfyUI" in segments[0].text
234 | assert info.transcription_options.hotwords == "ComfyUI"
235 |
236 |
237 | def test_transcribe_signature():
238 | model_transcribe_args = set(inspect.getargs(WhisperModel.transcribe.__code__).args)
239 | pipeline_transcribe_args = set(
240 | inspect.getargs(BatchedInferencePipeline.transcribe.__code__).args
241 | )
242 | pipeline_transcribe_args.remove("batch_size")
243 |
244 | assert model_transcribe_args == pipeline_transcribe_args
245 |
246 |
247 | def test_monotonic_timestamps(physcisworks_path):
248 | model = WhisperModel("tiny")
249 | pipeline = BatchedInferencePipeline(model=model)
250 |
251 | segments, info = model.transcribe(physcisworks_path, word_timestamps=True)
252 | segments = list(segments)
253 |
254 | for i in range(len(segments) - 1):
255 | assert segments[i].start <= segments[i].end
256 | assert segments[i].end <= segments[i + 1].start
257 | for word in segments[i].words:
258 | assert word.start <= word.end
259 | assert word.end <= segments[i].end
260 | assert segments[-1].end <= info.duration
261 |
262 | segments, info = pipeline.transcribe(physcisworks_path, word_timestamps=True)
263 | segments = list(segments)
264 |
265 | for i in range(len(segments) - 1):
266 | assert segments[i].start <= segments[i].end
267 | assert segments[i].end <= segments[i + 1].start
268 | for word in segments[i].words:
269 | assert word.start <= word.end
270 | assert word.end <= segments[i].end
271 | assert segments[-1].end <= info.duration
272 |
--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from faster_whisper import available_models, download_model
4 |
5 |
6 | def test_available_models():
7 | models = available_models()
8 | assert isinstance(models, list)
9 | assert "tiny" in models
10 |
11 |
12 | def test_download_model(tmpdir):
13 | output_dir = str(tmpdir.join("model"))
14 |
15 | model_dir = download_model("tiny", output_dir=output_dir)
16 |
17 | assert model_dir == output_dir
18 | assert os.path.isdir(model_dir)
19 | assert not os.path.islink(model_dir)
20 |
21 | for filename in os.listdir(model_dir):
22 | path = os.path.join(model_dir, filename)
23 | assert not os.path.islink(path)
24 |
25 |
26 | def test_download_model_in_cache(tmpdir):
27 | cache_dir = str(tmpdir.join("model"))
28 | download_model("tiny", cache_dir=cache_dir)
29 | assert os.path.isdir(cache_dir)
30 |
--------------------------------------------------------------------------------