├── requirements.txt
├── .gitmodules
├── demo
    ├── utterance
    │   └── vaiueo2d.wav
    └── demo.py
├── MANIFEST.in
├── pyproject.toml
├── pyworld
    ├── __init__.py
    ├── pyworld.pyi
    └── pyworld.pyx
├── LICENSE
├── setup.py
├── .github
    └── workflows
    │   └── build-wheels.yml
├── README.md
└── .gitignore


/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | matplotlib
3 | argparse; python_version<"3.5"
4 | soundfile
5 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib/World"]
2 | 	path = lib/World
3 | 	url = https://github.com/mmorise/World
4 | 


--------------------------------------------------------------------------------
/demo/utterance/vaiueo2d.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsukumijima/pyworld-prebuilt/master/demo/utterance/vaiueo2d.wav


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md LICENSE
2 | recursive-include lib *.cpp *.h LICENSE.txt
3 | recursive-include pyworld *.cpp *.pxd *.pyx
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools",
4 |     "numpy>=2; python_version>='3.9'",
5 |     "oldest-supported-numpy; python_version<'3.9'",
6 |     "cython>=3.0",
7 | ]
8 | build-backend = "setuptools.build_meta"
9 | 


--------------------------------------------------------------------------------
/pyworld/__init__.py:
--------------------------------------------------------------------------------
 1 | """PyWorld is a Python wrapper for WORLD vocoder.
 2 | 
 3 | PyWorld wrappers WORLD, which is a free software for high-quality speech
 4 | analysis, manipulation and synthesis. It can estimate fundamental frequency (F0),
 5 | aperiodicity and spectral envelope and also generate the speech like input speech
 6 | with only estimated parameters.
 7 | 
 8 | For more information, see https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder
 9 | """
10 | 
11 | from __future__ import division, print_function, absolute_import
12 | import sys
13 | 
14 | if sys.version_info >= (3, 8):
15 |     from importlib.metadata import version
16 | else:
17 |     from importlib_metadata import version
18 | 
19 | __version__ = version('pyworld-prebuilt')
20 | 
21 | from .pyworld import *
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright 2016 pyworld contributors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, with_statement
 2 | 
 3 | import sys
 4 | from glob import glob
 5 | from os.path import join
 6 | 
 7 | import numpy
 8 | from setuptools import Extension, find_packages, setup
 9 | from setuptools.command.build_ext import build_ext
10 | 
11 | 
12 | _VERSION = '0.3.5-post2'
13 | 
14 | 
15 | world_src_top = join("lib", "World", "src")
16 | world_sources = glob(join(world_src_top, "*.cpp"))
17 | 
18 | ext_modules = [
19 |     Extension(
20 |         name="pyworld.pyworld",
21 |         include_dirs=[world_src_top, numpy.get_include()],
22 |         sources=[join("pyworld", "pyworld.pyx")] + world_sources,
23 |         language="c++")]
24 | 
25 | kwargs = {"encoding": "utf-8"} if int(sys.version[0]) > 2 else {}
26 | setup(
27 |     name="pyworld-prebuilt",
28 |     description="PyWorld: a Python wrapper for WORLD vocoder",
29 |     long_description=open("README.md", "r", **kwargs).read(),
30 |     long_description_content_type="text/markdown",
31 |     ext_modules=ext_modules,
32 |     cmdclass={'build_ext': build_ext},
33 |     version=_VERSION,
34 |     packages=find_packages(),
35 |     install_requires=['numpy'],
36 |     extras_require={
37 |         'test': ['nose'],
38 |         'sdist': ['numpy', 'cython>=3.0'],
39 |     },
40 |     author="Pyworld Contributors",
41 |     author_email="jeremycchsu@gmail.com",
42 |     url="https://github.com/tsukumijima/pyworld-prebuilt",
43 |     keywords=['vocoder'],
44 |     classifiers=[],
45 | )
46 | 


--------------------------------------------------------------------------------
/pyworld/pyworld.pyi:
--------------------------------------------------------------------------------
 1 | from numpy.typing import NDArray
 2 | from typing import Tuple
 3 | 
 4 | 
 5 | def dio(x: NDArray, fs: int, f0_floor: float = ..., f0_ceil: float = ..., channels_in_octave: float = ..., frame_period: float = ..., speed: int = ..., allowed_range: float = ...) -> Tuple[NDArray, NDArray]: ...
 6 | 
 7 | def harvest(x: NDArray, fs: int, f0_floor: float = ..., f0_ceil: float = ..., frame_period: float = ...) -> Tuple[NDArray, NDArray]: ...
 8 | 
 9 | def stonemask(x: NDArray, f0: NDArray, temporal_positions: NDArray, fs: int) -> NDArray: ...
10 | 
11 | def get_cheaptrick_fft_size(fs: int, f0_floor: float = ...) -> int: ...
12 | 
13 | def get_cheaptrick_f0_floor(fs: int, fft_size: int) -> float: ...
14 | 
15 | def cheaptrick(x: NDArray, f0: NDArray, temporal_positions: NDArray, fs: int, q1: float = ..., f0_floor: float = ..., fft_size: int = ...) -> NDArray: ...
16 | 
17 | def d4c(x: NDArray, f0: NDArray, temporal_positions: NDArray, fs: int, threshold: float = ..., fft_size: int = ...) -> NDArray: ...
18 | 
19 | def synthesize(f0: NDArray, spectrogram: NDArray, aperiodicity: NDArray, fs: int, frame_period: float = ...) -> NDArray: ...
20 | 
21 | def get_num_aperiodicities(fs: int) -> int: ...
22 | 
23 | def code_aperiodicity(aperiodicity: NDArray, fs: int) -> NDArray: ...
24 | 
25 | def decode_aperiodicity(coded_aperiodicity: NDArray, fs: int, fft_size: int) -> NDArray: ...
26 | 
27 | def code_spectral_envelope(spectrogram: NDArray, fs: int, number_of_dimensions: int) -> NDArray: ...
28 | 
29 | def decode_spectral_envelope(coded_spectral_envelope: NDArray, fs: int, fft_size: int) -> NDArray: ...
30 | 
31 | def wav2world(x: NDArray, fs: int, fft_size: int = ..., frame_period: float = ...) -> Tuple[NDArray, NDArray, NDArray]: ...
32 | 


--------------------------------------------------------------------------------
/.github/workflows/build-wheels.yml:
--------------------------------------------------------------------------------
  1 | name: Build and Publish Wheels
  2 | 
  3 | on:
  4 |   push:
  5 |   workflow_dispatch:
  6 | 
  7 | permissions:
  8 |   contents: read
  9 |   id-token: write
 10 | 
 11 | jobs:
 12 |   build-wheels:
 13 |     name: Build Wheels on ${{ matrix.os }}
 14 |     strategy:
 15 |       matrix:
 16 |         os: [windows-2022, macos-15, ubuntu-24.04, ubuntu-24.04-arm]
 17 |         include:
 18 |           - os: windows-2022
 19 |             archs: [AMD64, x86, ARM64]
 20 |           - os: macos-15
 21 |             archs: [x86_64, arm64, universal2]
 22 |           - os: ubuntu-24.04
 23 |             archs: [x86_64]
 24 |           - os: ubuntu-24.04-arm
 25 |             archs: [aarch64]
 26 |     runs-on: ${{ matrix.os }}
 27 |     steps:
 28 |       - uses: actions/checkout@v4
 29 |         with:
 30 |           submodules: true
 31 | 
 32 |       - name: Set up Python
 33 |         uses: actions/setup-python@v5
 34 |         with:
 35 |           python-version: '3.x'
 36 |           cache: 'pip'
 37 | 
 38 |       - name: Setup cibuildwheel cache
 39 |         uses: actions/cache@v4
 40 |         with:
 41 |           path: |
 42 |             ~/.cache/pip
 43 |             ~/.cache/cibuildwheel
 44 |             ~/Library/Caches/pip
 45 |             ~/AppData/Local/pip/Cache
 46 |           key: ${{ runner.os }}-cibuildwheel-${{ hashFiles('**/pyproject.toml', '**/setup.py') }}
 47 |           restore-keys: |
 48 |             ${{ runner.os }}-cibuildwheel-
 49 | 
 50 |       - name: Build Wheels
 51 |         uses: pypa/cibuildwheel@main
 52 |         env:
 53 |           CIBW_ARCHS_LINUX: ${{ contains(matrix.os, 'ubuntu') && join(matrix.archs, ' ') || 'x86_64' }}
 54 |           CIBW_ARCHS_WINDOWS: ${{ contains(matrix.os, 'windows') && join(matrix.archs, ' ') || 'AMD64' }}
 55 |           CIBW_ARCHS_MACOS: ${{ (contains(matrix.os, 'macos') && join(matrix.archs, ' ')) || 'x86_64' }}
 56 |           CIBW_TEST_SKIP: "*-win_arm64"
 57 |           CIBW_SKIP: pp* *-musllinux*
 58 |           CIBW_PROJECT_REQUIRES_PYTHON: ">=3.9"
 59 | 
 60 |       - uses: actions/upload-artifact@v4
 61 |         with:
 62 |           name: wheels-${{ matrix.os }}-${{ join(matrix.archs, '-') }}
 63 |           path: ./wheelhouse
 64 |           retention-days: 5
 65 | 
 66 |   build-sdist:
 67 |     name: Build source distribution
 68 |     runs-on: ubuntu-latest
 69 |     steps:
 70 |       - uses: actions/checkout@v4
 71 |         with:
 72 |           submodules: true
 73 | 
 74 |       - name: Build sdist
 75 |         run: pipx run build --sdist
 76 | 
 77 |       - uses: actions/upload-artifact@v4
 78 |         with:
 79 |           name: wheels-sdist
 80 |           path: dist/*.tar.gz
 81 |           retention-days: 5
 82 | 
 83 |   publish-wheels:
 84 |     name: Publish Wheels
 85 |     needs: [build-wheels, build-sdist]
 86 |     runs-on: ubuntu-latest
 87 |     steps:
 88 |       - uses: actions/download-artifact@v4
 89 |         with:
 90 |           pattern: wheels-*
 91 |           merge-multiple: true
 92 |           path: ./wheelhouse
 93 | 
 94 |       - name: Publish to PyPI
 95 |         uses: pypa/gh-action-pypi-publish@release/v1
 96 |         if: ${{ startsWith(github.ref, 'refs/tags') }}
 97 |         with:
 98 |           packages-dir: ./wheelhouse
 99 |           skip-existing: true
100 |           password: ${{ secrets.PYPI_API_TOKEN }}
101 | 


--------------------------------------------------------------------------------
/demo/demo.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, print_function
  2 | 
  3 | import os
  4 | from shutil import rmtree
  5 | import argparse
  6 | 
  7 | import numpy as np
  8 | 
  9 | import matplotlib      # Remove this line if you don't need them
 10 | matplotlib.use('Agg')  # Remove this line if you don't need them
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | import soundfile as sf
 14 | # import librosa
 15 | import pyworld as pw
 16 | 
 17 | 
 18 | parser = argparse.ArgumentParser()
 19 | parser.add_argument("-f", "--frame_period", type=float, default=5.0)
 20 | parser.add_argument("-s", "--speed", type=int, default=1)
 21 | 
 22 | 
 23 | EPSILON = 1e-8
 24 | 
 25 | def savefig(filename, figlist, log=True):
 26 |     #h = 10
 27 |     n = len(figlist)
 28 |     # peek into instances
 29 |     f = figlist[0]
 30 |     if len(f.shape) == 1:
 31 |         plt.figure()
 32 |         for i, f in enumerate(figlist):
 33 |             plt.subplot(n, 1, i+1)
 34 |             if len(f.shape) == 1:
 35 |                 plt.plot(f)
 36 |                 plt.xlim([0, len(f)])
 37 |     elif len(f.shape) == 2:
 38 |         Nsmp, dim = figlist[0].shape
 39 |         #figsize=(h * float(Nsmp) / dim, len(figlist) * h)
 40 |         #plt.figure(figsize=figsize)
 41 |         plt.figure()
 42 |         for i, f in enumerate(figlist):
 43 |             plt.subplot(n, 1, i+1)
 44 |             if log:
 45 |                 x = np.log(f + EPSILON)
 46 |             else:
 47 |                 x = f + EPSILON
 48 |             plt.imshow(x.T, origin='lower', interpolation='none', aspect='auto', extent=(0, x.shape[0], 0, x.shape[1]))
 49 |     else:
 50 |         raise ValueError('Input dimension must < 3.')
 51 |     plt.savefig(filename)
 52 |     # plt.close()
 53 | 
 54 | 
 55 | def main(args):
 56 |     if os.path.isdir('test'):
 57 |         rmtree('test')
 58 |     os.mkdir('test')
 59 | 
 60 |     x, fs = sf.read('utterance/vaiueo2d.wav')
 61 |     # x, fs = librosa.load('utterance/vaiueo2d.wav', dtype=np.float64)
 62 | 
 63 |     # 1. A convient way
 64 |     f0, sp, ap = pw.wav2world(x, fs)    # use default options
 65 |     y = pw.synthesize(f0, sp, ap, fs, pw.default_frame_period)
 66 | 
 67 |     # 2. Step by step
 68 |     # 2-1 Without F0 refinement
 69 |     _f0, t = pw.dio(x, fs, f0_floor=50.0, f0_ceil=600.0,
 70 |                     channels_in_octave=2,
 71 |                     frame_period=args.frame_period,
 72 |                     speed=args.speed)
 73 |     _sp = pw.cheaptrick(x, _f0, t, fs)
 74 |     _ap = pw.d4c(x, _f0, t, fs)
 75 |     _y = pw.synthesize(_f0, _sp, _ap, fs, args.frame_period)
 76 |     # librosa.output.write_wav('test/y_without_f0_refinement.wav', _y, fs)
 77 |     sf.write('test/y_without_f0_refinement.wav', _y, fs)
 78 | 
 79 |     # 2-2 DIO with F0 refinement (using Stonemask)
 80 |     f0 = pw.stonemask(x, _f0, t, fs)
 81 |     sp = pw.cheaptrick(x, f0, t, fs)
 82 |     ap = pw.d4c(x, f0, t, fs)
 83 |     y = pw.synthesize(f0, sp, ap, fs, args.frame_period)
 84 |     # librosa.output.write_wav('test/y_with_f0_refinement.wav', y, fs)
 85 |     sf.write('test/y_with_f0_refinement.wav', y, fs)
 86 | 
 87 |     # 2-3 Harvest with F0 refinement (using Stonemask)
 88 |     _f0_h, t_h = pw.harvest(x, fs)
 89 |     f0_h = pw.stonemask(x, _f0_h, t_h, fs)
 90 |     sp_h = pw.cheaptrick(x, f0_h, t_h, fs)
 91 |     ap_h = pw.d4c(x, f0_h, t_h, fs)
 92 |     y_h = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)
 93 |     # librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)
 94 |     sf.write('test/y_harvest_with_f0_refinement.wav', y_h, fs)
 95 | 
 96 |     # Comparison
 97 |     savefig('test/wavform.png', [x, _y, y])
 98 |     savefig('test/sp.png', [_sp, sp])
 99 |     savefig('test/ap.png', [_ap, ap], log=False)
100 |     savefig('test/f0.png', [_f0, f0])
101 | 
102 |     print('Please check "test" directory for output files')
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     args = parser.parse_args()
107 |     main(args)
108 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pyworld-prebuilt - A Python wrapper of WORLD Vocoder
  2 | 
  3 | > [!TIP]  
  4 | > This is a prebuilt version of [pyworld](https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder) for multiple platforms.  
  5 | > There are no differences from pyworld except for some migration from some deprecated APIs and the addition of type hints.
  6 | 
  7 | WORLD Vocoder is a fast and high-quality vocoder
  8 | which parameterizes speech into three components:
  9 | 
 10 |   1. `f0`: Pitch contour
 11 |   2. `sp`: Harmonic spectral envelope
 12 |   3. `ap`: Aperiodic spectral envelope (relative to the harmonic spectral envelope)
 13 | 
 14 | It can also (re)synthesize speech using these features (see examples below).
 15 | 
 16 | For more information, please visit Dr. Morise's [WORLD repository](https://github.com/mmorise/World)
 17 | and the [official website of WORLD Vocoder](http://ml.cs.yamanashi.ac.jp/world/english)
 18 | 
 19 | 
 20 | ## APIs
 21 | 
 22 | ### Vocoder Functions
 23 | ```python
 24 | import pyworld as pw
 25 | _f0, t = pw.dio(x, fs)    # raw pitch extractor
 26 | f0 = pw.stonemask(x, _f0, t, fs)  # pitch refinement
 27 | sp = pw.cheaptrick(x, f0, t, fs)  # extract smoothed spectrogram
 28 | ap = pw.d4c(x, f0, t, fs)         # extract aperiodicity
 29 | 
 30 | y = pw.synthesize(f0, sp, ap, fs) # synthesize an utterance using the parameters
 31 | ```
 32 | 
 33 | 
 34 | ### Utility
 35 | ```python
 36 | # Convert speech into features (using default arguments)
 37 | f0, sp, ap = pw.wav2world(x, fs)
 38 | ```
 39 | <br/>
 40 | 
 41 | You can change the default arguments of the function, too.
 42 | See more info using `help`.
 43 | 
 44 | 
 45 | ## Installation
 46 | 
 47 | ### Using pip
 48 | `pip install pyworld-prebuilt`
 49 | <br/>
 50 | 
 51 | ### Building from Source
 52 | ```bash
 53 | git clone https://github.com/tsukumijima/pyworld-prebuilt.git
 54 | cd pyworld-prebuilt
 55 | git submodule update --init
 56 | pip install -U pip
 57 | pip install -r requirements.txt
 58 | pip install .
 59 | ```
 60 | It will automatically `git clone` Morise's World Vocoder (C++ version).<br/>
 61 | (It seems to me that using `virtualenv` or `conda` is the best practice.)<br/>
 62 | <br/>
 63 | 
 64 | ### Installation Validation
 65 | You can validate installation by running
 66 | ```bash
 67 | cd demo
 68 | python demo.py
 69 | ```
 70 | to see if you get results in `test/` direcotry.
 71 | (Please avoid writing and executing codes in the `pyworld-prebuilt` folder for now.)<br/>
 72 | 
 73 | ### Environment/Dependencies
 74 | - Operating systems
 75 |   - Linux Ubuntu 14.04+
 76 |   - Windows (thanks to [wuaalb](https://github.com/wuaalb))
 77 |   - WSL
 78 | - Python
 79 |   - 3.7+
 80 | 
 81 | You can install dependencies these by `pip install -r requirements.txt`
 82 | 
 83 | 
 84 | 
 85 | ## Notice
 86 | - WORLD vocoder is designed for speech sampled ≥ 16 kHz.
 87 |   Applying WORLD to 8 kHz speech will fail.
 88 |   See a possible workaround [here](https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/issues/54).
 89 | - When the SNR is low, extracting pitch using `harvest` instead of `dio`
 90 |   is a better option.
 91 | 
 92 | 
 93 | ## Troubleshooting
 94 | 1. Upgrade your Cython version to 0.24.<br/>
 95 |    (I failed to build it on Cython 0.20.1post0)<br/>
 96 |    It'll require you to download Cython form http://cython.org/<br/>
 97 |    Unzip it, and `python setup.py install` it.<br/>
 98 |    (I tried `pip install Cython` but the upgrade didn't seem correct)<br/>
 99 |    (Again, add `--user` if you don't have root access.)
100 | 2. Upon executing `demo/demo.py`, the following code might be needed in some environments (e.g. when you're working on a remote Linux server):<br/>
101 | 
102 |  ```python
103 |  import matplotlib
104 |  matplotlib.use('Agg')
105 |  ```
106 | 3. If you encounter `library not found: sndfile` error upon executing `demo.py`,
107 |    you might have to install it by `apt-get install libsoundfile1`.
108 |    You can also replace `pysoundfile` with `scipy` or `librosa`, but some modification is needed:
109 |    - librosa:
110 |      - load(fiilename, dtype=np.float64)
111 |      - output.write_wav(filename, wav, fs)
112 |      - remember to pass `dtype` argument to ensure that the method gives you a `double`.
113 |    - scipy:
114 |      - You'll have to write a customized utility function based on the following methods
115 |      - scipy.io.wavfile.read (but this gives you `short`)
116 |      - scipy.io.wavfile.write
117 | 
118 | 4. If you have installation issue on Windows, I probably could not provide
119 |    much help because my development environment is Ubuntu
120 |    and Windows Subsystem for Linux ([read this if you are interested in installing it](https://github.com/JeremyCCHsu/wsl)).
121 | 
122 | 
123 | ### Other Installation Suggestions
124 | 1. Use `pip install .` is safer and you can easily uninstall pyworld by `pip uninstall pyworld-prebuilt`
125 |   - For Mac users: You might need to do `MACOSX_DEPLOYMENT_TARGET=10.9 pip install .` See [issue](https://github.com/SeanNaren/warp-ctc/issues/129#issuecomment-502349652).
126 | 2. Another way to install pyworld is via<br/>
127 |    `python setup.py install`<br/>
128 |    - Add `--user` if you don't have root access<br/>
129 |    - Add `--record install.txt` to track the installation dir<br/>
130 | 3. If you just want to try out some experiments, execute<br/>
131 |   `python setup.py build_ext --inplace`<br/>
132 |   Then you can use PyWorld from this directory.<br/>
133 |   You can also copy the resulting **pyworld.so** (pyworld.{arch}.pyd on Windows) file to
134 |   `~/.local/lib/python2.7/site-packages` (or corresponding Windows directory)
135 |   so that you can use it everywhere like an installed package.<br/>
136 |   Alternatively you can copy/symlink the compiled files using pip, e.g. `pip install -e .`
137 | 
138 | 
139 | 
140 | ## Acknowledgement
141 | Thank all contributors ([tats-u](https://github.com/tats-u), [wuaalb](https://github.com/wuaalb), [r9y9](https://github.com/r9y9), [rikrd](https://github.com/rikrd), [kudan2510](https://github.com/kundan2510)) for making this repo better and [sotelo](https://github.com/sotelo) whose [world.py](https://github.com/sotelo/world.py) inspired this repo.<br/>
142 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | test/
  2 | *.cpp
  3 | *.pyd
  4 | .pypirc
  5 | .vscode
  6 | 
  7 | # Created by https://www.gitignore.io
  8 | 
  9 | ### Python ###
 10 | # Byte-compiled / optimized / DLL files
 11 | __pycache__/
 12 | *.py[cod]
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | env/
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .coverage
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | 
 59 | # Sphinx documentation
 60 | docs/_build/
 61 | 
 62 | # PyBuilder
 63 | target/
 64 | 
 65 | 
 66 | ### IPythonNotebook ###
 67 | # Temporary data
 68 | .ipynb_checkpoints/
 69 | 
 70 | 
 71 | ### SublimeText ###
 72 | # cache files for sublime text
 73 | *.tmlanguage.cache
 74 | *.tmPreferences.cache
 75 | *.stTheme.cache
 76 | 
 77 | # workspace files are user-specific
 78 | *.sublime-workspace
 79 | 
 80 | # project files should be checked into the repository, unless a significant
 81 | # proportion of contributors will probably not be using SublimeText
 82 | # *.sublime-project
 83 | 
 84 | # sftp configuration file
 85 | sftp-config.json
 86 | 
 87 | 
 88 | ### Emacs ###
 89 | # -*- mode: gitignore; -*-
 90 | *~
 91 | \#*\#
 92 | /.emacs.desktop
 93 | /.emacs.desktop.lock
 94 | *.elc
 95 | auto-save-list
 96 | tramp
 97 | .\#*
 98 | 
 99 | # Org-mode
100 | .org-id-locations
101 | *_archive
102 | 
103 | # flymake-mode
104 | *_flymake.*
105 | 
106 | # eshell files
107 | /eshell/history
108 | /eshell/lastdir
109 | 
110 | # elpa packages
111 | /elpa/
112 | 
113 | # reftex files
114 | *.rel
115 | 
116 | # AUCTeX auto folder
117 | /auto/
118 | 
119 | # cask packages
120 | .cask/
121 | 
122 | 
123 | ### Vim ###
124 | [._]*.s[a-w][a-z]
125 | [._]s[a-w][a-z]
126 | *.un~
127 | Session.vim
128 | .netrwhist
129 | *~
130 | 
131 | 
132 | ### C++ ###
133 | # Compiled Object files
134 | *.slo
135 | *.lo
136 | *.o
137 | *.obj
138 | 
139 | # Precompiled Headers
140 | *.gch
141 | *.pch
142 | 
143 | # Compiled Dynamic libraries
144 | *.so
145 | *.dylib
146 | *.dll
147 | 
148 | # Fortran module files
149 | *.mod
150 | 
151 | # Compiled Static libraries
152 | *.lai
153 | *.la
154 | *.a
155 | *.lib
156 | 
157 | # Executables
158 | *.exe
159 | *.out
160 | *.app
161 | 
162 | 
163 | ### OSX ###
164 | .DS_Store
165 | .AppleDouble
166 | .LSOverride
167 | 
168 | # Icon must end with two \r
169 | Icon
170 | 
171 | 
172 | # Thumbnails
173 | ._*
174 | 
175 | # Files that might appear on external disk
176 | .Spotlight-V100
177 | .Trashes
178 | 
179 | # Directories potentially created on remote AFP share
180 | .AppleDB
181 | .AppleDesktop
182 | Network Trash Folder
183 | Temporary Items
184 | .apdisk
185 | 
186 | 
187 | ### Linux ###
188 | *~
189 | 
190 | # KDE directory preferences
191 | .directory
192 | 
193 | # Linux trash folder which might appear on any partition or disk
194 | .Trash-*
195 | test/
196 | *.cpp
197 | *.pyd
198 | .pypirc
199 | .vscode
200 | 
201 | # Created by https://www.gitignore.io
202 | 
203 | ### Python ###
204 | # Byte-compiled / optimized / DLL files
205 | __pycache__/
206 | *.py[cod]
207 | 
208 | # C extensions
209 | *.so
210 | 
211 | # Distribution / packaging
212 | .Python
213 | env/
214 | build/
215 | develop-eggs/
216 | dist/
217 | downloads/
218 | eggs/
219 | lib/
220 | lib64/
221 | parts/
222 | sdist/
223 | var/
224 | *.egg-info/
225 | .installed.cfg
226 | *.egg
227 | 
228 | # PyInstaller
229 | #  Usually these files are written by a python script from a template
230 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
231 | *.manifest
232 | *.spec
233 | 
234 | # Installer logs
235 | pip-log.txt
236 | pip-delete-this-directory.txt
237 | 
238 | # Unit test / coverage reports
239 | htmlcov/
240 | .tox/
241 | .coverage
242 | .cache
243 | nosetests.xml
244 | coverage.xml
245 | 
246 | # Translations
247 | *.mo
248 | *.pot
249 | 
250 | # Django stuff:
251 | *.log
252 | 
253 | # Sphinx documentation
254 | docs/_build/
255 | 
256 | # PyBuilder
257 | target/
258 | 
259 | 
260 | ### IPythonNotebook ###
261 | # Temporary data
262 | .ipynb_checkpoints/
263 | 
264 | 
265 | ### SublimeText ###
266 | # cache files for sublime text
267 | *.tmlanguage.cache
268 | *.tmPreferences.cache
269 | *.stTheme.cache
270 | 
271 | # workspace files are user-specific
272 | *.sublime-workspace
273 | 
274 | # project files should be checked into the repository, unless a significant
275 | # proportion of contributors will probably not be using SublimeText
276 | # *.sublime-project
277 | 
278 | # sftp configuration file
279 | sftp-config.json
280 | 
281 | 
282 | ### Emacs ###
283 | # -*- mode: gitignore; -*-
284 | *~
285 | \#*\#
286 | /.emacs.desktop
287 | /.emacs.desktop.lock
288 | *.elc
289 | auto-save-list
290 | tramp
291 | .\#*
292 | 
293 | # Org-mode
294 | .org-id-locations
295 | *_archive
296 | 
297 | # flymake-mode
298 | *_flymake.*
299 | 
300 | # eshell files
301 | /eshell/history
302 | /eshell/lastdir
303 | 
304 | # elpa packages
305 | /elpa/
306 | 
307 | # reftex files
308 | *.rel
309 | 
310 | # AUCTeX auto folder
311 | /auto/
312 | 
313 | # cask packages
314 | .cask/
315 | 
316 | 
317 | ### Vim ###
318 | [._]*.s[a-w][a-z]
319 | [._]s[a-w][a-z]
320 | *.un~
321 | Session.vim
322 | .netrwhist
323 | *~
324 | 
325 | 
326 | ### C++ ###
327 | # Compiled Object files
328 | *.slo
329 | *.lo
330 | *.o
331 | *.obj
332 | 
333 | # Precompiled Headers
334 | *.gch
335 | *.pch
336 | 
337 | # Compiled Dynamic libraries
338 | *.so
339 | *.dylib
340 | *.dll
341 | 
342 | # Fortran module files
343 | *.mod
344 | 
345 | # Compiled Static libraries
346 | *.lai
347 | *.la
348 | *.a
349 | *.lib
350 | 
351 | # Executables
352 | *.exe
353 | *.out
354 | *.app
355 | 
356 | 
357 | ### OSX ###
358 | .DS_Store
359 | .AppleDouble
360 | .LSOverride
361 | 
362 | # Icon must end with two \r
363 | Icon
364 | 
365 | 
366 | # Thumbnails
367 | ._*
368 | 
369 | # Files that might appear on external disk
370 | .Spotlight-V100
371 | .Trashes
372 | 
373 | # Directories potentially created on remote AFP share
374 | .AppleDB
375 | .AppleDesktop
376 | Network Trash Folder
377 | Temporary Items
378 | .apdisk
379 | 
380 | 
381 | ### Linux ###
382 | *~
383 | 
384 | # KDE directory preferences
385 | .directory
386 | 
387 | # Linux trash folder which might appear on any partition or disk
388 | .Trash-*
389 | 


--------------------------------------------------------------------------------
/pyworld/pyworld.pyx:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import cython
  3 | 
  4 | import numpy as np
  5 | cimport numpy as np
  6 | np.import_array()
  7 | 
  8 | 
  9 | cdef extern from "world/synthesis.h":
 10 |     void Synthesis(const double *f0,
 11 |         int f0_length, const double * const *spectrogram,
 12 |         const double * const *aperiodicity,
 13 |         int fft_size, double frame_period,
 14 |         int fs, int y_length, double *y) except + nogil
 15 | 
 16 | 
 17 | cdef extern from "world/cheaptrick.h":
 18 |     ctypedef struct CheapTrickOption:
 19 |         double q1
 20 |         double f0_floor
 21 |         int fft_size
 22 | 
 23 |     int GetFFTSizeForCheapTrick(int fs, const CheapTrickOption *option) except +
 24 |     double GetF0FloorForCheapTrick(int fs, int fft_size) except +
 25 |     void InitializeCheapTrickOption(int fs, CheapTrickOption *option) except +
 26 |     void CheapTrick(const double *x, int x_length, int fs, const double *temporal_positions,
 27 |         const double *f0, int f0_length, const CheapTrickOption *option,
 28 |         double **spectrogram) except + nogil
 29 | 
 30 | 
 31 | cdef extern from "world/dio.h":
 32 |     ctypedef struct DioOption:
 33 |         double f0_floor
 34 |         double f0_ceil
 35 |         double channels_in_octave
 36 |         double frame_period
 37 |         int speed
 38 |         double allowed_range
 39 | 
 40 |     void InitializeDioOption(DioOption *option) except +
 41 |     int GetSamplesForDIO(int fs, int x_length, double frame_period)
 42 |     void Dio(const double *x, int x_length, int fs, const DioOption *option,
 43 |         double *temporal_positions, double *f0) except + nogil
 44 | 
 45 | 
 46 | cdef extern from "world/harvest.h":
 47 |     ctypedef struct HarvestOption:
 48 |         double f0_floor
 49 |         double f0_ceil
 50 |         double frame_period
 51 | 
 52 |     void InitializeHarvestOption(HarvestOption *option)
 53 |     int GetSamplesForHarvest(int fs, int x_length, double frame_period)
 54 |     void Harvest(const double *x, int x_length, int fs, const HarvestOption *option,
 55 |         double *temporal_positions, double *f0) except + nogil
 56 | 
 57 | 
 58 | cdef extern from "world/d4c.h":
 59 |     ctypedef struct D4COption:
 60 |         double threshold
 61 | 
 62 |     void InitializeD4COption(D4COption *option) except +
 63 |     void D4C(const double *x, int x_length, int fs, const double *temporal_positions,
 64 |         const double *f0, int f0_length, int fft_size, const D4COption *option,
 65 |         double **aperiodicity) except + nogil
 66 | 
 67 | 
 68 | cdef extern from "world/stonemask.h":
 69 |     void StoneMask(const double *x, int x_length, int fs,
 70 |         const double *temporal_positions, const double *f0, int f0_length,
 71 |         double *refined_f0) except + nogil
 72 | 
 73 | 
 74 | cdef extern from "world/codec.h":
 75 |     int GetNumberOfAperiodicities(int fs)
 76 |     void CodeAperiodicity(const double * const *aperiodicity, int f0_length,
 77 |         int fs, int fft_size, double **coded_aperiodicity) except +
 78 |     void DecodeAperiodicity(const double * const *coded_aperiodicity,
 79 |         int f0_length, int fs, int fft_size, double **aperiodicity) except +
 80 |     void CodeSpectralEnvelope(const double * const *spectrogram, int f0_length,
 81 |         int fs, int fft_size, int number_of_dimensions,
 82 |         double **coded_spectral_envelope) except +
 83 |     void DecodeSpectralEnvelope(const double * const *coded_spectral_envelope,
 84 |         int f0_length, int fs, int fft_size, int number_of_dimensions,
 85 |         double **spectrogram) except +
 86 | 
 87 | 
 88 | default_frame_period = 5.0
 89 | default_f0_floor = 71.0
 90 | default_f0_ceil = 800.0
 91 | 
 92 | 
 93 | def dio(np.ndarray[double, ndim=1, mode="c"] x not None, int fs,
 94 |         f0_floor=default_f0_floor, f0_ceil=default_f0_ceil,
 95 |         channels_in_octave=2.0, frame_period=default_frame_period,
 96 |         speed=1, allowed_range=0.1):
 97 |     """DIO F0 extraction algorithm.
 98 | 
 99 |     Parameters
100 |     ----------
101 |     x : ndarray
102 |         Input waveform signal.
103 |     fs : int
104 |         Sample rate of input signal in Hz.
105 |     f0_floor : float
106 |         Lower F0 limit in Hz.
107 |         Default: 71.0
108 |     f0_ceil : float
109 |         Upper F0 limit in Hz.
110 |         Default: 800.0
111 |     channels_in_octave : float
112 |         Resolution of multiband processing; normally shouldn't be changed.
113 |         Default: 2.0
114 |     frame_period : float
115 |         Period between consecutive frames in milliseconds.
116 |         Default: 5.0
117 |     speed : int
118 |         The F0 estimator may downsample the input signal using this integer factor
119 |         (range [1;12]). The algorithm will then operate on a signal at fs/speed Hz
120 |         to reduce computational complexity, but high values may negatively impact
121 |         accuracy.
122 |         Default: 1 (no downsampling)
123 |     allowed_range : float
124 |         Threshold for voiced/unvoiced decision. Can be any value >= 0, but 0.02 to 0.2
125 |         is a reasonable range. Lower values will cause more frames to be considered
126 |         unvoiced (in the extreme case of `threshold=0`, almost all frames will be unvoiced).
127 |         Default: 0.1
128 | 
129 |     Returns
130 |     -------
131 |     f0 : ndarray
132 |         Estimated F0 contour.
133 |     temporal_positions : ndarray
134 |         Temporal position of each frame.
135 |     """
136 |     cdef int x_length = <int>len(x)
137 |     cdef DioOption option
138 |     InitializeDioOption(&option)
139 |     option.channels_in_octave = channels_in_octave
140 |     option.f0_floor = f0_floor
141 |     option.f0_ceil = f0_ceil
142 |     option.frame_period = frame_period
143 |     option.speed = speed
144 |     option.allowed_range = allowed_range
145 |     f0_length = GetSamplesForDIO(fs, x_length, option.frame_period)
146 |     cdef np.ndarray[double, ndim=1, mode="c"] f0 = \
147 |         np.zeros(f0_length, dtype=np.dtype('float64'))
148 |     cdef np.ndarray[double, ndim=1, mode="c"] temporal_positions = \
149 |         np.zeros(f0_length, dtype=np.dtype('float64'))
150 |     with (nogil, cython.boundscheck(False)):
151 |         Dio(&x[0], x_length, fs, &option, &temporal_positions[0], &f0[0])
152 |     return f0, temporal_positions
153 | 
154 | 
155 | def harvest(np.ndarray[double, ndim=1, mode="c"] x not None, int fs,
156 |             f0_floor=default_f0_floor, f0_ceil=default_f0_ceil,
157 |             frame_period=default_frame_period):
158 |     """Harvest F0 extraction algorithm.
159 | 
160 |     Parameters
161 |     ----------
162 |     x : ndarray
163 |         Input waveform signal.
164 |     fs : int
165 |         Sample rate of input signal in Hz.
166 |     f0_floor : float
167 |         Lower F0 limit in Hz.
168 |         Default: 71.0
169 |     f0_ceil : float
170 |         Upper F0 limit in Hz.
171 |         Default: 800.0
172 |     frame_period : float
173 |         Period between consecutive frames in milliseconds.
174 |         Default: 5.0
175 | 
176 |     Returns
177 |     -------
178 |     f0 : ndarray
179 |         Estimated F0 contour.
180 |     temporal_positions : ndarray
181 |         Temporal position of each frame.
182 |     """
183 |     cdef int x_length = <int>len(x)
184 |     cdef HarvestOption option
185 |     InitializeHarvestOption(&option)
186 |     option.f0_floor = f0_floor
187 |     option.f0_ceil = f0_ceil
188 |     option.frame_period = frame_period
189 |     f0_length = GetSamplesForHarvest(fs, x_length, option.frame_period)
190 |     cdef np.ndarray[double, ndim=1, mode="c"] f0 = \
191 |         np.zeros(f0_length, dtype=np.dtype('float64'))
192 |     cdef np.ndarray[double, ndim=1, mode="c"] temporal_positions = \
193 |         np.zeros(f0_length, dtype=np.dtype('float64'))
194 |     with (nogil, cython.boundscheck(False)):
195 |         Harvest(&x[0], x_length, fs, &option, &temporal_positions[0], &f0[0])
196 |     return f0, temporal_positions
197 | 
198 | 
199 | def stonemask(np.ndarray[double, ndim=1, mode="c"] x not None,
200 |               np.ndarray[double, ndim=1, mode="c"] f0 not None,
201 |               np.ndarray[double, ndim=1, mode="c"] temporal_positions not None,
202 |               int fs):
203 |     """StoneMask F0 refinement algorithm.
204 | 
205 |     Parameters
206 |     ----------
207 |     x : ndarray
208 |         Input waveform signal.
209 |     f0 : ndarray
210 |         Input F0 contour.
211 |     temporal_positions : ndarray
212 |         Temporal positions of each frame.
213 |     fs : int
214 |         Sample rate of input signal in Hz.
215 | 
216 |     Returns
217 |     -------
218 |     refined_f0 : ndarray
219 |         Refined F0 contour.
220 |     """
221 |     cdef int x_length = <int>len(x)
222 |     cdef int f0_length = <int>len(f0)
223 |     cdef np.ndarray[double, ndim=1, mode="c"] refined_f0 = \
224 |         np.zeros(f0_length, dtype=np.dtype('float64'))
225 |     with (nogil, cython.boundscheck(False)):
226 |         StoneMask(&x[0], x_length, fs, &temporal_positions[0],
227 |             &f0[0], f0_length, &refined_f0[0])
228 |     return refined_f0
229 | 
230 | 
231 | def get_cheaptrick_fft_size(int fs, f0_floor=default_f0_floor):
232 |     """Calculate suitable FFT size for CheapTrick given F0 floor.
233 | 
234 |     Parameters
235 |     ----------
236 |     fs : int
237 |         Sample rate of input signal in Hz.
238 |     f0_floor : float
239 |         Lower F0 limit in Hz. The required FFT size is a direct
240 |         consequence of the F0 floor used.
241 |         Default: 71.0
242 | 
243 |     Returns
244 |     -------
245 |     fft_size : int
246 |         Resulting FFT size.
247 |     """
248 |     cdef CheapTrickOption option
249 |     option.f0_floor = f0_floor
250 |     cdef int fft_size = GetFFTSizeForCheapTrick(fs, &option)
251 |     return fft_size
252 | 
253 | def get_cheaptrick_f0_floor(int fs, int fft_size):
254 |     """Calculates actual lower F0 limit for CheapTrick
255 |     based on the sampling frequency and FFT size used. Whenever F0 is below
256 |     this threshold the spectrum will be analyzed as if the frame is unvoiced
257 |     (using kDefaultF0 defined in constantnumbers.h).
258 | 
259 |     Parameters
260 |     ----------
261 |     fs : int
262 |         Sample rate of input signal in Hz.
263 |     fft_size : int
264 |         FFT size used for CheapTrick.
265 | 
266 |     Returns
267 |     -------
268 |     f0_floor : float
269 |         Resulting lower F0 limit in Hz.
270 |     """
271 |     cdef double f0_floor = GetF0FloorForCheapTrick(fs, fft_size)
272 |     return f0_floor
273 | 
274 | def cheaptrick(np.ndarray[double, ndim=1, mode="c"] x not None,
275 |                np.ndarray[double, ndim=1, mode="c"] f0 not None,
276 |                np.ndarray[double, ndim=1, mode="c"] temporal_positions not None,
277 |                int fs,
278 |                q1=-0.15, f0_floor=default_f0_floor, fft_size=None):
279 |     """CheapTrick harmonic spectral envelope estimation algorithm.
280 | 
281 |     Parameters
282 |     ----------
283 |     x : ndarray
284 |         Input waveform signal.
285 |     f0 : ndarray
286 |         Input F0 contour.
287 |     temporal_positions : ndarray
288 |         Temporal positions of each frame.
289 |     fs : int
290 |         Sample rate of input signal in Hz.
291 |     q1 : float
292 |         Spectral recovery parameter.
293 |         Default: -0.15 (this value was tuned and normally does not need adjustment)
294 |     f0_floor : float, None
295 |         Lower F0 limit in Hz. Not used in case `fft_size` is specified.
296 |         Default: 71.0
297 |     fft_size : int, None
298 |         FFT size to be used. When `None` (default) is used, the FFT size is computed
299 |         automatically as a function of the given input sample rate and F0 floor.
300 |         When `fft_size` is specified, the given `f0_floor` parameter is ignored.
301 |         Default: None
302 | 
303 |     Returns
304 |     -------
305 |     spectrogram : ndarray
306 |         Spectral envelope (squared magnitude).
307 |     """
308 |     cdef CheapTrickOption option
309 |     InitializeCheapTrickOption(fs, &option)
310 |     option.q1 = q1
311 |     if fft_size is None:
312 |         option.f0_floor = f0_floor  # CheapTrickOption.f0_floor is only used in GetFFTSizeForCheapTrick()
313 |         option.fft_size = GetFFTSizeForCheapTrick(fs, &option)
314 |     else:
315 |         option.fft_size = fft_size
316 |         # the f0_floor used by CheapTrick() will be re-compute from this given fft_size
317 |     cdef int x_length = <int>len(x)
318 |     cdef int f0_length = <int>len(f0)
319 | 
320 |     cdef double[:, ::1] spectrogram = np.zeros((f0_length, option.fft_size//2 + 1),
321 |                                                dtype=np.dtype('float64'))
322 |     cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp)
323 |     cdef double **cpp_spectrogram = <double**> (<void*> &tmp[0])
324 |     cdef np.intp_t i
325 |     with (nogil, cython.boundscheck(False)):
326 |         for i in range(f0_length):
327 |             cpp_spectrogram[i] = &spectrogram[i, 0]
328 | 
329 |         CheapTrick(&x[0], x_length, fs, &temporal_positions[0],
330 |             &f0[0], f0_length, &option, cpp_spectrogram)
331 |     return np.array(spectrogram, dtype=np.float64)
332 | 
333 | 
334 | def d4c(np.ndarray[double, ndim=1, mode="c"] x not None,
335 |         np.ndarray[double, ndim=1, mode="c"] f0 not None,
336 |         np.ndarray[double, ndim=1, mode="c"] temporal_positions not None,
337 |         int fs,
338 |         threshold=0.85, fft_size=None):
339 |     """D4C aperiodicity estimation algorithm.
340 | 
341 |     Parameters
342 |     ----------
343 |     x : ndarray
344 |         Input waveform signal.
345 |     f0 : ndarray
346 |         Input F0 contour.
347 |     temporal_positions : ndarray
348 |         Temporal positions of each frame.
349 |     fs : int
350 |         Sample rate of input signal in Hz.
351 |     q1 : float
352 |         Spectral recovery parameter.
353 |         Default: -0.15 (this value was tuned and normally does not need adjustment)
354 |     threshold : float
355 |         Threshold for aperiodicity-based voiced/unvoiced decision, in range 0 to 1.
356 |         If a value of 0 is used, voiced frames will be kept voiced. If a value > 0 is
357 |         used some voiced frames can be considered unvoiced by setting their aperiodicity
358 |         to 1 (thus synthesizing them with white noise). Using `threshold=0` will result
359 |         in the behavior of older versions of D4C. The current default of 0.85 is meant
360 |         to be used in combination with the Harvest F0 estimator, which was designed to have
361 |         a high voiced/unvoiced threshold (i.e. most frames will be considered voiced).
362 |         Default: 0.85
363 |     fft_size : int, None
364 |         FFT size to be used. When `None` (default) is used, the FFT size is computed
365 |         automatically as a function of the given input sample rate and the default F0 floor.
366 |         When `fft_size` is specified, it should match the FFT size used to compute
367 |         the spectral envelope (i.e. `fft_size=2*(sp.shape[1] - 1)`) in order to get the
368 |         desired results when resynthesizing.
369 |         Default: None
370 | 
371 |     Returns
372 |     -------
373 |     aperiodicity : ndarray
374 |         Aperiodicity (envelope, linear magnitude relative to spectral envelope).
375 |     """
376 |     cdef int x_length = <int>len(x)
377 |     cdef int f0_length = <int>len(f0)
378 |     cdef int fft_size0
379 |     if fft_size is None:
380 |         fft_size0 = get_cheaptrick_fft_size(fs, default_f0_floor)
381 |     else:
382 |         fft_size0 = fft_size
383 | 
384 |     cdef D4COption option
385 |     InitializeD4COption(&option)
386 |     option.threshold = threshold
387 | 
388 |     cdef double[:, ::1] aperiodicity = np.zeros((f0_length, fft_size0//2 + 1),
389 |                                                 dtype=np.dtype('float64'))
390 |     cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp)
391 |     cdef double **cpp_aperiodicity = <double**> (<void*> &tmp[0])
392 |     cdef np.intp_t i
393 |     with (nogil, cython.boundscheck(False)):
394 |         for i in range(f0_length):
395 |             cpp_aperiodicity[i] = &aperiodicity[i, 0]
396 | 
397 |         D4C(&x[0], x_length, fs, &temporal_positions[0],
398 |             &f0[0], f0_length, fft_size0, &option,
399 |             cpp_aperiodicity)
400 |     return np.array(aperiodicity, dtype=np.float64)
401 | 
402 | 
403 | def synthesize(np.ndarray[double, ndim=1, mode="c"] f0 not None,
404 |                np.ndarray[double, ndim=2, mode="c"] spectrogram not None,
405 |                np.ndarray[double, ndim=2, mode="c"] aperiodicity not None,
406 |                int fs,
407 |                double frame_period=default_frame_period):
408 |     """WORLD synthesis from parametric representation.
409 | 
410 |     Parameters
411 |     ----------
412 |     f0 : ndarray
413 |         Input F0 contour.
414 |     spectrogram : ndarray
415 |         Spectral envelope.
416 |     aperiodicity : ndarray
417 |         Aperodicity envelope.
418 |     fs : int
419 |         Sample rate of input signal in Hz.
420 |     frame_period : float
421 |         Period between consecutive frames in milliseconds.
422 |         Default: 5.0
423 | 
424 |     Returns
425 |     -------
426 |     y : ndarray
427 |         Output waveform signal.
428 |     """
429 |     if (f0.shape[0] != spectrogram.shape[0] or
430 |         f0.shape[0] != aperiodicity.shape[0]):
431 |         raise ValueError('Mismatched number of frames between F0 ({:d}), '
432 |                          'spectrogram ({:d}) and aperiodicty ({:d})'
433 |                          .format(f0.shape[0], spectrogram.shape[0],
434 |                                  aperiodicity.shape[0]))
435 |     if spectrogram.shape[1] != aperiodicity.shape[1]:
436 |         raise ValueError('Mismatched dimensionality (spec size) between '
437 |                          'spectrogram ({:d}) and aperiodicity ({:d})'
438 |                          .format(spectrogram.shape[1], aperiodicity.shape[1]))
439 | 
440 |     cdef int f0_length = <int>len(f0)
441 |     cdef int y_length = <int>(f0_length * frame_period * fs / 1000)
442 |     cdef int fft_size = (<int>spectrogram.shape[1] - 1)*2
443 |     cdef np.ndarray[double, ndim=1, mode="c"] y = \
444 |         np.zeros(y_length, dtype=np.dtype('float64'))
445 | 
446 |     cdef double[:, ::1] spectrogram0 = spectrogram
447 |     cdef double[:, ::1] aperiodicity0 = aperiodicity
448 |     cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp)
449 |     cdef np.intp_t[:] tmp2 = np.zeros(f0_length, dtype=np.intp)
450 |     cdef double **cpp_spectrogram = <double**> (<void*> &tmp[0])
451 |     cdef double **cpp_aperiodicity = <double**> (<void*> &tmp2[0])
452 |     cdef np.intp_t i
453 |     with (nogil, cython.boundscheck(False)):
454 |         for i in range(f0_length):
455 |             cpp_spectrogram[i] = &spectrogram0[i, 0]
456 |             cpp_aperiodicity[i] = &aperiodicity0[i, 0]
457 | 
458 |         Synthesis(&f0[0], f0_length, cpp_spectrogram,
459 |             cpp_aperiodicity, fft_size, frame_period, fs, y_length, &y[0])
460 |     return y
461 | 
462 | 
463 | def get_num_aperiodicities(fs):
464 |     """Calculate the required dimensionality to code D4C aperiodicity.
465 | 
466 |     Parameters
467 |     ----------
468 |     fs : int
469 |         Sample rate of input signal in Hz.
470 | 
471 |     Returns
472 |     -------
473 |     n_aper : int
474 |         Required number of coefficients.
475 |     """
476 |     cdef int n_aper = GetNumberOfAperiodicities(fs)
477 |     return n_aper
478 | 
479 | def code_aperiodicity(np.ndarray[double, ndim=2, mode="c"] aperiodicity, fs):
480 |     """Reduce dimensionality of D4C aperiodicity.
481 | 
482 |     Parameters
483 |     ----------
484 |     aperiodicity : ndarray
485 |         Aperodicity envelope.
486 |     fs : int
487 |         Sample rate of input signal in Hz.
488 | 
489 |     Returns
490 |     -------
491 |     coded_aperiodicity : ndarray
492 |         Coded aperiodicity envelope.
493 |     """
494 |     cdef int ap_length = <int>len(aperiodicity)
495 |     cdef int fft_size = (<int>aperiodicity.shape[1] - 1)*2
496 |     cdef int n_coded_aper = get_num_aperiodicities(fs)
497 | 
498 |     cdef double[:, ::1] aper = aperiodicity
499 |     cdef double[:, ::1] coded_aper = np.zeros((ap_length, n_coded_aper),
500 |                                               dtype=np.dtype('float64'))
501 |     cdef np.intp_t[:] tmp1 = np.zeros(ap_length, dtype=np.intp)
502 |     cdef np.intp_t[:] tmp2 = np.zeros(ap_length, dtype=np.intp)
503 |     cdef double **cpp_aper = <double**> (<void*> &tmp1[0])
504 |     cdef double **cpp_coded_aper = <double**> (<void*> &tmp2[0])
505 |     cdef np.intp_t i
506 |     for i in range(ap_length):
507 |         cpp_aper[i] = &aper[i, 0]
508 |         cpp_coded_aper[i] = &coded_aper[i, 0]
509 | 
510 |     CodeAperiodicity(cpp_aper, ap_length, fs,
511 |         fft_size, cpp_coded_aper)
512 | 
513 |     return np.array(coded_aper, dtype=np.float64)
514 | 
515 | def decode_aperiodicity(np.ndarray[double, ndim=2, mode="c"] coded_aperiodicity,
516 |                         fs, fft_size):
517 |     """Restore full dimensionality of coded D4C aperiodicity.
518 | 
519 |     Parameters
520 |     ----------
521 |     coded_aperiodicity : ndarray
522 |         Coded aperodicity envelope.
523 |     fs : int
524 |         Sample rate of input signal in Hz.
525 |     fft_size : int
526 |         FFT size corresponding to the full dimensional aperiodicity.
527 | 
528 |     Returns
529 |     -------
530 |     aperiodicity : ndarray
531 |         Aperiodicity envelope.
532 |     """
533 |     cdef int ap_length = <int>len(coded_aperiodicity)
534 |     cdef int n_coded_aper = get_num_aperiodicities(fs)
535 |     if n_coded_aper != coded_aperiodicity.shape[1]:
536 |         raise ValueError('Invalid aperiodicity code dimensionality '
537 |                          '(was: {:d}, expected: {:d})'
538 |                          .format(coded_aperiodicity.shape[1], n_coded_aper))
539 | 
540 |     cdef double[:, ::1] coded_aper = coded_aperiodicity
541 |     cdef double[:, ::1] aper = np.zeros((ap_length, fft_size//2 + 1),
542 |                                         dtype=np.dtype('float64'))
543 |     cdef np.intp_t[:] tmp1 = np.zeros(ap_length, dtype=np.intp)
544 |     cdef np.intp_t[:] tmp2 = np.zeros(ap_length, dtype=np.intp)
545 |     cdef double **cpp_coded_aper = <double**> (<void*> &tmp1[0])
546 |     cdef double **cpp_aper = <double**> (<void*> &tmp2[0])
547 |     cdef np.intp_t i
548 |     for i in range(ap_length):
549 |         cpp_coded_aper[i] = &coded_aper[i, 0]
550 |         cpp_aper[i] = &aper[i, 0]
551 | 
552 |     DecodeAperiodicity(cpp_coded_aper, ap_length, fs, fft_size, cpp_aper)
553 | 
554 |     return np.array(aper, dtype=np.float64)
555 | 
556 | def code_spectral_envelope(np.ndarray[double, ndim=2, mode="c"] spectrogram, fs,
557 |                            number_of_dimensions):
558 |     """Reduce dimensionality of spectral envelope.
559 | 
560 |     Parameters
561 |     ----------
562 |     spectrogram : ndarray
563 |         Spectral envelope.
564 |     fs : int
565 |         Sample rate of input signal in Hz.
566 |     number_of_dimensions : int
567 |         Number of dimentions of coded spectral envelope
568 | 
569 |     Returns
570 |     -------
571 |     coded_spectral_envelope : ndarray
572 |         Coded spectral envelope.
573 |     """
574 |     cdef int sp_length = <int>len(spectrogram)
575 |     cdef int fft_size = (<int>spectrogram.shape[1] - 1)*2
576 | 
577 |     cdef double[:, ::1] sp = spectrogram
578 |     cdef double[:, ::1] coded_sp = np.zeros((sp_length, number_of_dimensions),
579 |                                               dtype=np.dtype('float64'))
580 |     cdef np.intp_t[:] tmp1 = np.zeros(sp_length, dtype=np.intp)
581 |     cdef np.intp_t[:] tmp2 = np.zeros(sp_length, dtype=np.intp)
582 |     cdef double **cpp_sp = <double**> (<void*> &tmp1[0])
583 |     cdef double **cpp_coded_sp = <double**> (<void*> &tmp2[0])
584 |     cdef np.intp_t i
585 |     for i in range(sp_length):
586 |         cpp_sp[i] = &sp[i, 0]
587 |         cpp_coded_sp[i] = &coded_sp[i, 0]
588 | 
589 |     CodeSpectralEnvelope(cpp_sp, sp_length, fs, fft_size,
590 |       number_of_dimensions, cpp_coded_sp)
591 | 
592 |     return np.array(coded_sp, dtype=np.float64)
593 | 
594 | def decode_spectral_envelope(np.ndarray[double, ndim=2, mode="c"] coded_spectral_envelope,
595 |                              fs, fft_size):
596 |     """Restore full dimensionality of coded spectral envelope.
597 | 
598 |     Parameters
599 |     ----------
600 |     coded_spectral_envelope : ndarray
601 |         Coded spectral envelope.
602 |     fs : int
603 |         Sample rate of input signal in Hz.
604 |     fft_size : int
605 |         FFT size corresponding to the full dimensional spectral envelope.
606 | 
607 |     Returns
608 |     -------
609 |     spectrogram : ndarray
610 |         Spectral envelope.
611 |     """
612 |     cdef int sp_length = <int>len(coded_spectral_envelope)
613 |     cdef int number_of_dimensions = <int>len(coded_spectral_envelope[0])
614 |     cdef double[:, ::1] coded_sp = coded_spectral_envelope
615 |     cdef double[:, ::1] sp = np.zeros((sp_length, fft_size//2 + 1),
616 |                                         dtype=np.dtype('float64'))
617 |     cdef np.intp_t[:] tmp1 = np.zeros(sp_length, dtype=np.intp)
618 |     cdef np.intp_t[:] tmp2 = np.zeros(sp_length, dtype=np.intp)
619 |     cdef double **cpp_coded_sp = <double**> (<void*> &tmp1[0])
620 |     cdef double **cpp_sp = <double**> (<void*> &tmp2[0])
621 |     cdef np.intp_t i
622 |     for i in range(sp_length):
623 |         cpp_coded_sp[i] = &coded_sp[i, 0]
624 |         cpp_sp[i] = &sp[i, 0]
625 | 
626 |     DecodeSpectralEnvelope(cpp_coded_sp, sp_length, fs, fft_size,
627 |       number_of_dimensions, cpp_sp)
628 | 
629 |     return np.array(sp, dtype=np.float64)
630 | 
631 | def wav2world(x, fs, fft_size=None, frame_period=default_frame_period):
632 |     """Convenience function to do all WORLD analysis steps in a single call.
633 | 
634 |     In this case only `frame_period` can be configured and other parameters
635 |     are fixed to their defaults. Likewise, F0 estimation is fixed to
636 |     DIO plus StoneMask refinement.
637 | 
638 |     Parameters
639 |     ----------
640 |     x : ndarray
641 |         Input waveform signal.
642 |     fs : int
643 |         Sample rate of input signal in Hz.
644 |     fft_size : int
645 |         Length of Fast Fourier Transform (in number of samples)
646 |         The resulting dimension of `ap` adn `sp` will be `fft_size` // 2 + 1
647 |     frame_period : float
648 |         Period between consecutive frames in milliseconds.
649 |         Default: 5.0
650 | 
651 |     Returns
652 |     -------
653 |     f0 : ndarray
654 |         F0 contour.
655 |     sp : ndarray
656 |         Spectral envelope.
657 |     ap : ndarray
658 |         Aperiodicity.
659 |     """
660 |     _f0, t = dio(x, fs, frame_period=frame_period)
661 |     f0 = stonemask(x, _f0, t, fs)
662 |     sp = cheaptrick(x, f0, t, fs, fft_size=fft_size)
663 |     ap = d4c(x, f0, t, fs, fft_size=fft_size)
664 |     return f0, sp, ap
665 | 


--------------------------------------------------------------------------------