├── requirements.txt
├── .gitmodules
├── demo
├── utterance
│ └── vaiueo2d.wav
└── demo.py
├── MANIFEST.in
├── pyproject.toml
├── pyworld
├── __init__.py
├── pyworld.pyi
└── pyworld.pyx
├── LICENSE
├── setup.py
├── .github
└── workflows
│ └── build-wheels.yml
├── README.md
└── .gitignore
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | matplotlib
3 | argparse; python_version<"3.5"
4 | soundfile
5 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib/World"]
2 | path = lib/World
3 | url = https://github.com/mmorise/World
4 |
--------------------------------------------------------------------------------
/demo/utterance/vaiueo2d.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsukumijima/pyworld-prebuilt/master/demo/utterance/vaiueo2d.wav
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md LICENSE
2 | recursive-include lib *.cpp *.h LICENSE.txt
3 | recursive-include pyworld *.cpp *.pxd *.pyx
4 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "setuptools",
4 | "numpy>=2; python_version>='3.9'",
5 | "oldest-supported-numpy; python_version<'3.9'",
6 | "cython>=3.0",
7 | ]
8 | build-backend = "setuptools.build_meta"
9 |
--------------------------------------------------------------------------------
/pyworld/__init__.py:
--------------------------------------------------------------------------------
1 | """PyWorld is a Python wrapper for WORLD vocoder.
2 |
3 | PyWorld wrappers WORLD, which is a free software for high-quality speech
4 | analysis, manipulation and synthesis. It can estimate fundamental frequency (F0),
5 | aperiodicity and spectral envelope and also generate the speech like input speech
6 | with only estimated parameters.
7 |
8 | For more information, see https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder
9 | """
10 |
11 | from __future__ import division, print_function, absolute_import
12 | import sys
13 |
14 | if sys.version_info >= (3, 8):
15 | from importlib.metadata import version
16 | else:
17 | from importlib_metadata import version
18 |
19 | __version__ = version('pyworld-prebuilt')
20 |
21 | from .pyworld import *
22 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright 2016 pyworld contributors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, print_function, with_statement
2 |
3 | import sys
4 | from glob import glob
5 | from os.path import join
6 |
7 | import numpy
8 | from setuptools import Extension, find_packages, setup
9 | from setuptools.command.build_ext import build_ext
10 |
11 |
12 | _VERSION = '0.3.5-post2'
13 |
14 |
15 | world_src_top = join("lib", "World", "src")
16 | world_sources = glob(join(world_src_top, "*.cpp"))
17 |
18 | ext_modules = [
19 | Extension(
20 | name="pyworld.pyworld",
21 | include_dirs=[world_src_top, numpy.get_include()],
22 | sources=[join("pyworld", "pyworld.pyx")] + world_sources,
23 | language="c++")]
24 |
25 | kwargs = {"encoding": "utf-8"} if int(sys.version[0]) > 2 else {}
26 | setup(
27 | name="pyworld-prebuilt",
28 | description="PyWorld: a Python wrapper for WORLD vocoder",
29 | long_description=open("README.md", "r", **kwargs).read(),
30 | long_description_content_type="text/markdown",
31 | ext_modules=ext_modules,
32 | cmdclass={'build_ext': build_ext},
33 | version=_VERSION,
34 | packages=find_packages(),
35 | install_requires=['numpy'],
36 | extras_require={
37 | 'test': ['nose'],
38 | 'sdist': ['numpy', 'cython>=3.0'],
39 | },
40 | author="Pyworld Contributors",
41 | author_email="jeremycchsu@gmail.com",
42 | url="https://github.com/tsukumijima/pyworld-prebuilt",
43 | keywords=['vocoder'],
44 | classifiers=[],
45 | )
46 |
--------------------------------------------------------------------------------
/pyworld/pyworld.pyi:
--------------------------------------------------------------------------------
1 | from numpy.typing import NDArray
2 | from typing import Tuple
3 |
4 |
5 | def dio(x: NDArray, fs: int, f0_floor: float = ..., f0_ceil: float = ..., channels_in_octave: float = ..., frame_period: float = ..., speed: int = ..., allowed_range: float = ...) -> Tuple[NDArray, NDArray]: ...
6 |
7 | def harvest(x: NDArray, fs: int, f0_floor: float = ..., f0_ceil: float = ..., frame_period: float = ...) -> Tuple[NDArray, NDArray]: ...
8 |
9 | def stonemask(x: NDArray, f0: NDArray, temporal_positions: NDArray, fs: int) -> NDArray: ...
10 |
11 | def get_cheaptrick_fft_size(fs: int, f0_floor: float = ...) -> int: ...
12 |
13 | def get_cheaptrick_f0_floor(fs: int, fft_size: int) -> float: ...
14 |
15 | def cheaptrick(x: NDArray, f0: NDArray, temporal_positions: NDArray, fs: int, q1: float = ..., f0_floor: float = ..., fft_size: int = ...) -> NDArray: ...
16 |
17 | def d4c(x: NDArray, f0: NDArray, temporal_positions: NDArray, fs: int, threshold: float = ..., fft_size: int = ...) -> NDArray: ...
18 |
19 | def synthesize(f0: NDArray, spectrogram: NDArray, aperiodicity: NDArray, fs: int, frame_period: float = ...) -> NDArray: ...
20 |
21 | def get_num_aperiodicities(fs: int) -> int: ...
22 |
23 | def code_aperiodicity(aperiodicity: NDArray, fs: int) -> NDArray: ...
24 |
25 | def decode_aperiodicity(coded_aperiodicity: NDArray, fs: int, fft_size: int) -> NDArray: ...
26 |
27 | def code_spectral_envelope(spectrogram: NDArray, fs: int, number_of_dimensions: int) -> NDArray: ...
28 |
29 | def decode_spectral_envelope(coded_spectral_envelope: NDArray, fs: int, fft_size: int) -> NDArray: ...
30 |
31 | def wav2world(x: NDArray, fs: int, fft_size: int = ..., frame_period: float = ...) -> Tuple[NDArray, NDArray, NDArray]: ...
32 |
--------------------------------------------------------------------------------
/.github/workflows/build-wheels.yml:
--------------------------------------------------------------------------------
1 | name: Build and Publish Wheels
2 |
3 | on:
4 | push:
5 | workflow_dispatch:
6 |
7 | permissions:
8 | contents: read
9 | id-token: write
10 |
11 | jobs:
12 | build-wheels:
13 | name: Build Wheels on ${{ matrix.os }}
14 | strategy:
15 | matrix:
16 | os: [windows-2022, macos-15, ubuntu-24.04, ubuntu-24.04-arm]
17 | include:
18 | - os: windows-2022
19 | archs: [AMD64, x86, ARM64]
20 | - os: macos-15
21 | archs: [x86_64, arm64, universal2]
22 | - os: ubuntu-24.04
23 | archs: [x86_64]
24 | - os: ubuntu-24.04-arm
25 | archs: [aarch64]
26 | runs-on: ${{ matrix.os }}
27 | steps:
28 | - uses: actions/checkout@v4
29 | with:
30 | submodules: true
31 |
32 | - name: Set up Python
33 | uses: actions/setup-python@v5
34 | with:
35 | python-version: '3.x'
36 | cache: 'pip'
37 |
38 | - name: Setup cibuildwheel cache
39 | uses: actions/cache@v4
40 | with:
41 | path: |
42 | ~/.cache/pip
43 | ~/.cache/cibuildwheel
44 | ~/Library/Caches/pip
45 | ~/AppData/Local/pip/Cache
46 | key: ${{ runner.os }}-cibuildwheel-${{ hashFiles('**/pyproject.toml', '**/setup.py') }}
47 | restore-keys: |
48 | ${{ runner.os }}-cibuildwheel-
49 |
50 | - name: Build Wheels
51 | uses: pypa/cibuildwheel@main
52 | env:
53 | CIBW_ARCHS_LINUX: ${{ contains(matrix.os, 'ubuntu') && join(matrix.archs, ' ') || 'x86_64' }}
54 | CIBW_ARCHS_WINDOWS: ${{ contains(matrix.os, 'windows') && join(matrix.archs, ' ') || 'AMD64' }}
55 | CIBW_ARCHS_MACOS: ${{ (contains(matrix.os, 'macos') && join(matrix.archs, ' ')) || 'x86_64' }}
56 | CIBW_TEST_SKIP: "*-win_arm64"
57 | CIBW_SKIP: pp* *-musllinux*
58 | CIBW_PROJECT_REQUIRES_PYTHON: ">=3.9"
59 |
60 | - uses: actions/upload-artifact@v4
61 | with:
62 | name: wheels-${{ matrix.os }}-${{ join(matrix.archs, '-') }}
63 | path: ./wheelhouse
64 | retention-days: 5
65 |
66 | build-sdist:
67 | name: Build source distribution
68 | runs-on: ubuntu-latest
69 | steps:
70 | - uses: actions/checkout@v4
71 | with:
72 | submodules: true
73 |
74 | - name: Build sdist
75 | run: pipx run build --sdist
76 |
77 | - uses: actions/upload-artifact@v4
78 | with:
79 | name: wheels-sdist
80 | path: dist/*.tar.gz
81 | retention-days: 5
82 |
83 | publish-wheels:
84 | name: Publish Wheels
85 | needs: [build-wheels, build-sdist]
86 | runs-on: ubuntu-latest
87 | steps:
88 | - uses: actions/download-artifact@v4
89 | with:
90 | pattern: wheels-*
91 | merge-multiple: true
92 | path: ./wheelhouse
93 |
94 | - name: Publish to PyPI
95 | uses: pypa/gh-action-pypi-publish@release/v1
96 | if: ${{ startsWith(github.ref, 'refs/tags') }}
97 | with:
98 | packages-dir: ./wheelhouse
99 | skip-existing: true
100 | password: ${{ secrets.PYPI_API_TOKEN }}
101 |
--------------------------------------------------------------------------------
/demo/demo.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, print_function
2 |
3 | import os
4 | from shutil import rmtree
5 | import argparse
6 |
7 | import numpy as np
8 |
9 | import matplotlib # Remove this line if you don't need them
10 | matplotlib.use('Agg') # Remove this line if you don't need them
11 | import matplotlib.pyplot as plt
12 |
13 | import soundfile as sf
14 | # import librosa
15 | import pyworld as pw
16 |
17 |
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument("-f", "--frame_period", type=float, default=5.0)
20 | parser.add_argument("-s", "--speed", type=int, default=1)
21 |
22 |
23 | EPSILON = 1e-8
24 |
25 | def savefig(filename, figlist, log=True):
26 | #h = 10
27 | n = len(figlist)
28 | # peek into instances
29 | f = figlist[0]
30 | if len(f.shape) == 1:
31 | plt.figure()
32 | for i, f in enumerate(figlist):
33 | plt.subplot(n, 1, i+1)
34 | if len(f.shape) == 1:
35 | plt.plot(f)
36 | plt.xlim([0, len(f)])
37 | elif len(f.shape) == 2:
38 | Nsmp, dim = figlist[0].shape
39 | #figsize=(h * float(Nsmp) / dim, len(figlist) * h)
40 | #plt.figure(figsize=figsize)
41 | plt.figure()
42 | for i, f in enumerate(figlist):
43 | plt.subplot(n, 1, i+1)
44 | if log:
45 | x = np.log(f + EPSILON)
46 | else:
47 | x = f + EPSILON
48 | plt.imshow(x.T, origin='lower', interpolation='none', aspect='auto', extent=(0, x.shape[0], 0, x.shape[1]))
49 | else:
50 | raise ValueError('Input dimension must < 3.')
51 | plt.savefig(filename)
52 | # plt.close()
53 |
54 |
55 | def main(args):
56 | if os.path.isdir('test'):
57 | rmtree('test')
58 | os.mkdir('test')
59 |
60 | x, fs = sf.read('utterance/vaiueo2d.wav')
61 | # x, fs = librosa.load('utterance/vaiueo2d.wav', dtype=np.float64)
62 |
63 | # 1. A convient way
64 | f0, sp, ap = pw.wav2world(x, fs) # use default options
65 | y = pw.synthesize(f0, sp, ap, fs, pw.default_frame_period)
66 |
67 | # 2. Step by step
68 | # 2-1 Without F0 refinement
69 | _f0, t = pw.dio(x, fs, f0_floor=50.0, f0_ceil=600.0,
70 | channels_in_octave=2,
71 | frame_period=args.frame_period,
72 | speed=args.speed)
73 | _sp = pw.cheaptrick(x, _f0, t, fs)
74 | _ap = pw.d4c(x, _f0, t, fs)
75 | _y = pw.synthesize(_f0, _sp, _ap, fs, args.frame_period)
76 | # librosa.output.write_wav('test/y_without_f0_refinement.wav', _y, fs)
77 | sf.write('test/y_without_f0_refinement.wav', _y, fs)
78 |
79 | # 2-2 DIO with F0 refinement (using Stonemask)
80 | f0 = pw.stonemask(x, _f0, t, fs)
81 | sp = pw.cheaptrick(x, f0, t, fs)
82 | ap = pw.d4c(x, f0, t, fs)
83 | y = pw.synthesize(f0, sp, ap, fs, args.frame_period)
84 | # librosa.output.write_wav('test/y_with_f0_refinement.wav', y, fs)
85 | sf.write('test/y_with_f0_refinement.wav', y, fs)
86 |
87 | # 2-3 Harvest with F0 refinement (using Stonemask)
88 | _f0_h, t_h = pw.harvest(x, fs)
89 | f0_h = pw.stonemask(x, _f0_h, t_h, fs)
90 | sp_h = pw.cheaptrick(x, f0_h, t_h, fs)
91 | ap_h = pw.d4c(x, f0_h, t_h, fs)
92 | y_h = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)
93 | # librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)
94 | sf.write('test/y_harvest_with_f0_refinement.wav', y_h, fs)
95 |
96 | # Comparison
97 | savefig('test/wavform.png', [x, _y, y])
98 | savefig('test/sp.png', [_sp, sp])
99 | savefig('test/ap.png', [_ap, ap], log=False)
100 | savefig('test/f0.png', [_f0, f0])
101 |
102 | print('Please check "test" directory for output files')
103 |
104 |
105 | if __name__ == '__main__':
106 | args = parser.parse_args()
107 | main(args)
108 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pyworld-prebuilt - A Python wrapper of WORLD Vocoder
2 |
3 | > [!TIP]
4 | > This is a prebuilt version of [pyworld](https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder) for multiple platforms.
5 | > There are no differences from pyworld except for some migration from some deprecated APIs and the addition of type hints.
6 |
7 | WORLD Vocoder is a fast and high-quality vocoder
8 | which parameterizes speech into three components:
9 |
10 | 1. `f0`: Pitch contour
11 | 2. `sp`: Harmonic spectral envelope
12 | 3. `ap`: Aperiodic spectral envelope (relative to the harmonic spectral envelope)
13 |
14 | It can also (re)synthesize speech using these features (see examples below).
15 |
16 | For more information, please visit Dr. Morise's [WORLD repository](https://github.com/mmorise/World)
17 | and the [official website of WORLD Vocoder](http://ml.cs.yamanashi.ac.jp/world/english)
18 |
19 |
20 | ## APIs
21 |
22 | ### Vocoder Functions
23 | ```python
24 | import pyworld as pw
25 | _f0, t = pw.dio(x, fs) # raw pitch extractor
26 | f0 = pw.stonemask(x, _f0, t, fs) # pitch refinement
27 | sp = pw.cheaptrick(x, f0, t, fs) # extract smoothed spectrogram
28 | ap = pw.d4c(x, f0, t, fs) # extract aperiodicity
29 |
30 | y = pw.synthesize(f0, sp, ap, fs) # synthesize an utterance using the parameters
31 | ```
32 |
33 |
34 | ### Utility
35 | ```python
36 | # Convert speech into features (using default arguments)
37 | f0, sp, ap = pw.wav2world(x, fs)
38 | ```
39 |
40 |
41 | You can change the default arguments of the function, too.
42 | See more info using `help`.
43 |
44 |
45 | ## Installation
46 |
47 | ### Using pip
48 | `pip install pyworld-prebuilt`
49 |
50 |
51 | ### Building from Source
52 | ```bash
53 | git clone https://github.com/tsukumijima/pyworld-prebuilt.git
54 | cd pyworld-prebuilt
55 | git submodule update --init
56 | pip install -U pip
57 | pip install -r requirements.txt
58 | pip install .
59 | ```
60 | It will automatically `git clone` Morise's World Vocoder (C++ version).
61 | (It seems to me that using `virtualenv` or `conda` is the best practice.)
62 |
63 |
64 | ### Installation Validation
65 | You can validate installation by running
66 | ```bash
67 | cd demo
68 | python demo.py
69 | ```
70 | to see if you get results in `test/` direcotry.
71 | (Please avoid writing and executing codes in the `pyworld-prebuilt` folder for now.)
72 |
73 | ### Environment/Dependencies
74 | - Operating systems
75 | - Linux Ubuntu 14.04+
76 | - Windows (thanks to [wuaalb](https://github.com/wuaalb))
77 | - WSL
78 | - Python
79 | - 3.7+
80 |
81 | You can install dependencies these by `pip install -r requirements.txt`
82 |
83 |
84 |
85 | ## Notice
86 | - WORLD vocoder is designed for speech sampled ≥ 16 kHz.
87 | Applying WORLD to 8 kHz speech will fail.
88 | See a possible workaround [here](https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/issues/54).
89 | - When the SNR is low, extracting pitch using `harvest` instead of `dio`
90 | is a better option.
91 |
92 |
93 | ## Troubleshooting
94 | 1. Upgrade your Cython version to 0.24.
95 | (I failed to build it on Cython 0.20.1post0)
96 | It'll require you to download Cython form http://cython.org/
97 | Unzip it, and `python setup.py install` it.
98 | (I tried `pip install Cython` but the upgrade didn't seem correct)
99 | (Again, add `--user` if you don't have root access.)
100 | 2. Upon executing `demo/demo.py`, the following code might be needed in some environments (e.g. when you're working on a remote Linux server):
101 |
102 | ```python
103 | import matplotlib
104 | matplotlib.use('Agg')
105 | ```
106 | 3. If you encounter `library not found: sndfile` error upon executing `demo.py`,
107 | you might have to install it by `apt-get install libsoundfile1`.
108 | You can also replace `pysoundfile` with `scipy` or `librosa`, but some modification is needed:
109 | - librosa:
110 | - load(fiilename, dtype=np.float64)
111 | - output.write_wav(filename, wav, fs)
112 | - remember to pass `dtype` argument to ensure that the method gives you a `double`.
113 | - scipy:
114 | - You'll have to write a customized utility function based on the following methods
115 | - scipy.io.wavfile.read (but this gives you `short`)
116 | - scipy.io.wavfile.write
117 |
118 | 4. If you have installation issue on Windows, I probably could not provide
119 | much help because my development environment is Ubuntu
120 | and Windows Subsystem for Linux ([read this if you are interested in installing it](https://github.com/JeremyCCHsu/wsl)).
121 |
122 |
123 | ### Other Installation Suggestions
124 | 1. Use `pip install .` is safer and you can easily uninstall pyworld by `pip uninstall pyworld-prebuilt`
125 | - For Mac users: You might need to do `MACOSX_DEPLOYMENT_TARGET=10.9 pip install .` See [issue](https://github.com/SeanNaren/warp-ctc/issues/129#issuecomment-502349652).
126 | 2. Another way to install pyworld is via
127 | `python setup.py install`
128 | - Add `--user` if you don't have root access
129 | - Add `--record install.txt` to track the installation dir
130 | 3. If you just want to try out some experiments, execute
131 | `python setup.py build_ext --inplace`
132 | Then you can use PyWorld from this directory.
133 | You can also copy the resulting **pyworld.so** (pyworld.{arch}.pyd on Windows) file to
134 | `~/.local/lib/python2.7/site-packages` (or corresponding Windows directory)
135 | so that you can use it everywhere like an installed package.
136 | Alternatively you can copy/symlink the compiled files using pip, e.g. `pip install -e .`
137 |
138 |
139 |
140 | ## Acknowledgement
141 | Thank all contributors ([tats-u](https://github.com/tats-u), [wuaalb](https://github.com/wuaalb), [r9y9](https://github.com/r9y9), [rikrd](https://github.com/rikrd), [kudan2510](https://github.com/kundan2510)) for making this repo better and [sotelo](https://github.com/sotelo) whose [world.py](https://github.com/sotelo/world.py) inspired this repo.
142 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | test/
2 | *.cpp
3 | *.pyd
4 | .pypirc
5 | .vscode
6 |
7 | # Created by https://www.gitignore.io
8 |
9 | ### Python ###
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 |
14 | # C extensions
15 | *.so
16 |
17 | # Distribution / packaging
18 | .Python
19 | env/
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 |
59 | # Sphinx documentation
60 | docs/_build/
61 |
62 | # PyBuilder
63 | target/
64 |
65 |
66 | ### IPythonNotebook ###
67 | # Temporary data
68 | .ipynb_checkpoints/
69 |
70 |
71 | ### SublimeText ###
72 | # cache files for sublime text
73 | *.tmlanguage.cache
74 | *.tmPreferences.cache
75 | *.stTheme.cache
76 |
77 | # workspace files are user-specific
78 | *.sublime-workspace
79 |
80 | # project files should be checked into the repository, unless a significant
81 | # proportion of contributors will probably not be using SublimeText
82 | # *.sublime-project
83 |
84 | # sftp configuration file
85 | sftp-config.json
86 |
87 |
88 | ### Emacs ###
89 | # -*- mode: gitignore; -*-
90 | *~
91 | \#*\#
92 | /.emacs.desktop
93 | /.emacs.desktop.lock
94 | *.elc
95 | auto-save-list
96 | tramp
97 | .\#*
98 |
99 | # Org-mode
100 | .org-id-locations
101 | *_archive
102 |
103 | # flymake-mode
104 | *_flymake.*
105 |
106 | # eshell files
107 | /eshell/history
108 | /eshell/lastdir
109 |
110 | # elpa packages
111 | /elpa/
112 |
113 | # reftex files
114 | *.rel
115 |
116 | # AUCTeX auto folder
117 | /auto/
118 |
119 | # cask packages
120 | .cask/
121 |
122 |
123 | ### Vim ###
124 | [._]*.s[a-w][a-z]
125 | [._]s[a-w][a-z]
126 | *.un~
127 | Session.vim
128 | .netrwhist
129 | *~
130 |
131 |
132 | ### C++ ###
133 | # Compiled Object files
134 | *.slo
135 | *.lo
136 | *.o
137 | *.obj
138 |
139 | # Precompiled Headers
140 | *.gch
141 | *.pch
142 |
143 | # Compiled Dynamic libraries
144 | *.so
145 | *.dylib
146 | *.dll
147 |
148 | # Fortran module files
149 | *.mod
150 |
151 | # Compiled Static libraries
152 | *.lai
153 | *.la
154 | *.a
155 | *.lib
156 |
157 | # Executables
158 | *.exe
159 | *.out
160 | *.app
161 |
162 |
163 | ### OSX ###
164 | .DS_Store
165 | .AppleDouble
166 | .LSOverride
167 |
168 | # Icon must end with two \r
169 | Icon
170 |
171 |
172 | # Thumbnails
173 | ._*
174 |
175 | # Files that might appear on external disk
176 | .Spotlight-V100
177 | .Trashes
178 |
179 | # Directories potentially created on remote AFP share
180 | .AppleDB
181 | .AppleDesktop
182 | Network Trash Folder
183 | Temporary Items
184 | .apdisk
185 |
186 |
187 | ### Linux ###
188 | *~
189 |
190 | # KDE directory preferences
191 | .directory
192 |
193 | # Linux trash folder which might appear on any partition or disk
194 | .Trash-*
195 | test/
196 | *.cpp
197 | *.pyd
198 | .pypirc
199 | .vscode
200 |
201 | # Created by https://www.gitignore.io
202 |
203 | ### Python ###
204 | # Byte-compiled / optimized / DLL files
205 | __pycache__/
206 | *.py[cod]
207 |
208 | # C extensions
209 | *.so
210 |
211 | # Distribution / packaging
212 | .Python
213 | env/
214 | build/
215 | develop-eggs/
216 | dist/
217 | downloads/
218 | eggs/
219 | lib/
220 | lib64/
221 | parts/
222 | sdist/
223 | var/
224 | *.egg-info/
225 | .installed.cfg
226 | *.egg
227 |
228 | # PyInstaller
229 | # Usually these files are written by a python script from a template
230 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
231 | *.manifest
232 | *.spec
233 |
234 | # Installer logs
235 | pip-log.txt
236 | pip-delete-this-directory.txt
237 |
238 | # Unit test / coverage reports
239 | htmlcov/
240 | .tox/
241 | .coverage
242 | .cache
243 | nosetests.xml
244 | coverage.xml
245 |
246 | # Translations
247 | *.mo
248 | *.pot
249 |
250 | # Django stuff:
251 | *.log
252 |
253 | # Sphinx documentation
254 | docs/_build/
255 |
256 | # PyBuilder
257 | target/
258 |
259 |
260 | ### IPythonNotebook ###
261 | # Temporary data
262 | .ipynb_checkpoints/
263 |
264 |
265 | ### SublimeText ###
266 | # cache files for sublime text
267 | *.tmlanguage.cache
268 | *.tmPreferences.cache
269 | *.stTheme.cache
270 |
271 | # workspace files are user-specific
272 | *.sublime-workspace
273 |
274 | # project files should be checked into the repository, unless a significant
275 | # proportion of contributors will probably not be using SublimeText
276 | # *.sublime-project
277 |
278 | # sftp configuration file
279 | sftp-config.json
280 |
281 |
282 | ### Emacs ###
283 | # -*- mode: gitignore; -*-
284 | *~
285 | \#*\#
286 | /.emacs.desktop
287 | /.emacs.desktop.lock
288 | *.elc
289 | auto-save-list
290 | tramp
291 | .\#*
292 |
293 | # Org-mode
294 | .org-id-locations
295 | *_archive
296 |
297 | # flymake-mode
298 | *_flymake.*
299 |
300 | # eshell files
301 | /eshell/history
302 | /eshell/lastdir
303 |
304 | # elpa packages
305 | /elpa/
306 |
307 | # reftex files
308 | *.rel
309 |
310 | # AUCTeX auto folder
311 | /auto/
312 |
313 | # cask packages
314 | .cask/
315 |
316 |
317 | ### Vim ###
318 | [._]*.s[a-w][a-z]
319 | [._]s[a-w][a-z]
320 | *.un~
321 | Session.vim
322 | .netrwhist
323 | *~
324 |
325 |
326 | ### C++ ###
327 | # Compiled Object files
328 | *.slo
329 | *.lo
330 | *.o
331 | *.obj
332 |
333 | # Precompiled Headers
334 | *.gch
335 | *.pch
336 |
337 | # Compiled Dynamic libraries
338 | *.so
339 | *.dylib
340 | *.dll
341 |
342 | # Fortran module files
343 | *.mod
344 |
345 | # Compiled Static libraries
346 | *.lai
347 | *.la
348 | *.a
349 | *.lib
350 |
351 | # Executables
352 | *.exe
353 | *.out
354 | *.app
355 |
356 |
357 | ### OSX ###
358 | .DS_Store
359 | .AppleDouble
360 | .LSOverride
361 |
362 | # Icon must end with two \r
363 | Icon
364 |
365 |
366 | # Thumbnails
367 | ._*
368 |
369 | # Files that might appear on external disk
370 | .Spotlight-V100
371 | .Trashes
372 |
373 | # Directories potentially created on remote AFP share
374 | .AppleDB
375 | .AppleDesktop
376 | Network Trash Folder
377 | Temporary Items
378 | .apdisk
379 |
380 |
381 | ### Linux ###
382 | *~
383 |
384 | # KDE directory preferences
385 | .directory
386 |
387 | # Linux trash folder which might appear on any partition or disk
388 | .Trash-*
389 |
--------------------------------------------------------------------------------
/pyworld/pyworld.pyx:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import cython
3 |
4 | import numpy as np
5 | cimport numpy as np
6 | np.import_array()
7 |
8 |
9 | cdef extern from "world/synthesis.h":
10 | void Synthesis(const double *f0,
11 | int f0_length, const double * const *spectrogram,
12 | const double * const *aperiodicity,
13 | int fft_size, double frame_period,
14 | int fs, int y_length, double *y) except + nogil
15 |
16 |
17 | cdef extern from "world/cheaptrick.h":
18 | ctypedef struct CheapTrickOption:
19 | double q1
20 | double f0_floor
21 | int fft_size
22 |
23 | int GetFFTSizeForCheapTrick(int fs, const CheapTrickOption *option) except +
24 | double GetF0FloorForCheapTrick(int fs, int fft_size) except +
25 | void InitializeCheapTrickOption(int fs, CheapTrickOption *option) except +
26 | void CheapTrick(const double *x, int x_length, int fs, const double *temporal_positions,
27 | const double *f0, int f0_length, const CheapTrickOption *option,
28 | double **spectrogram) except + nogil
29 |
30 |
31 | cdef extern from "world/dio.h":
32 | ctypedef struct DioOption:
33 | double f0_floor
34 | double f0_ceil
35 | double channels_in_octave
36 | double frame_period
37 | int speed
38 | double allowed_range
39 |
40 | void InitializeDioOption(DioOption *option) except +
41 | int GetSamplesForDIO(int fs, int x_length, double frame_period)
42 | void Dio(const double *x, int x_length, int fs, const DioOption *option,
43 | double *temporal_positions, double *f0) except + nogil
44 |
45 |
46 | cdef extern from "world/harvest.h":
47 | ctypedef struct HarvestOption:
48 | double f0_floor
49 | double f0_ceil
50 | double frame_period
51 |
52 | void InitializeHarvestOption(HarvestOption *option)
53 | int GetSamplesForHarvest(int fs, int x_length, double frame_period)
54 | void Harvest(const double *x, int x_length, int fs, const HarvestOption *option,
55 | double *temporal_positions, double *f0) except + nogil
56 |
57 |
58 | cdef extern from "world/d4c.h":
59 | ctypedef struct D4COption:
60 | double threshold
61 |
62 | void InitializeD4COption(D4COption *option) except +
63 | void D4C(const double *x, int x_length, int fs, const double *temporal_positions,
64 | const double *f0, int f0_length, int fft_size, const D4COption *option,
65 | double **aperiodicity) except + nogil
66 |
67 |
68 | cdef extern from "world/stonemask.h":
69 | void StoneMask(const double *x, int x_length, int fs,
70 | const double *temporal_positions, const double *f0, int f0_length,
71 | double *refined_f0) except + nogil
72 |
73 |
74 | cdef extern from "world/codec.h":
75 | int GetNumberOfAperiodicities(int fs)
76 | void CodeAperiodicity(const double * const *aperiodicity, int f0_length,
77 | int fs, int fft_size, double **coded_aperiodicity) except +
78 | void DecodeAperiodicity(const double * const *coded_aperiodicity,
79 | int f0_length, int fs, int fft_size, double **aperiodicity) except +
80 | void CodeSpectralEnvelope(const double * const *spectrogram, int f0_length,
81 | int fs, int fft_size, int number_of_dimensions,
82 | double **coded_spectral_envelope) except +
83 | void DecodeSpectralEnvelope(const double * const *coded_spectral_envelope,
84 | int f0_length, int fs, int fft_size, int number_of_dimensions,
85 | double **spectrogram) except +
86 |
87 |
88 | default_frame_period = 5.0
89 | default_f0_floor = 71.0
90 | default_f0_ceil = 800.0
91 |
92 |
93 | def dio(np.ndarray[double, ndim=1, mode="c"] x not None, int fs,
94 | f0_floor=default_f0_floor, f0_ceil=default_f0_ceil,
95 | channels_in_octave=2.0, frame_period=default_frame_period,
96 | speed=1, allowed_range=0.1):
97 | """DIO F0 extraction algorithm.
98 |
99 | Parameters
100 | ----------
101 | x : ndarray
102 | Input waveform signal.
103 | fs : int
104 | Sample rate of input signal in Hz.
105 | f0_floor : float
106 | Lower F0 limit in Hz.
107 | Default: 71.0
108 | f0_ceil : float
109 | Upper F0 limit in Hz.
110 | Default: 800.0
111 | channels_in_octave : float
112 | Resolution of multiband processing; normally shouldn't be changed.
113 | Default: 2.0
114 | frame_period : float
115 | Period between consecutive frames in milliseconds.
116 | Default: 5.0
117 | speed : int
118 | The F0 estimator may downsample the input signal using this integer factor
119 | (range [1;12]). The algorithm will then operate on a signal at fs/speed Hz
120 | to reduce computational complexity, but high values may negatively impact
121 | accuracy.
122 | Default: 1 (no downsampling)
123 | allowed_range : float
124 | Threshold for voiced/unvoiced decision. Can be any value >= 0, but 0.02 to 0.2
125 | is a reasonable range. Lower values will cause more frames to be considered
126 | unvoiced (in the extreme case of `threshold=0`, almost all frames will be unvoiced).
127 | Default: 0.1
128 |
129 | Returns
130 | -------
131 | f0 : ndarray
132 | Estimated F0 contour.
133 | temporal_positions : ndarray
134 | Temporal position of each frame.
135 | """
136 | cdef int x_length = len(x)
137 | cdef DioOption option
138 | InitializeDioOption(&option)
139 | option.channels_in_octave = channels_in_octave
140 | option.f0_floor = f0_floor
141 | option.f0_ceil = f0_ceil
142 | option.frame_period = frame_period
143 | option.speed = speed
144 | option.allowed_range = allowed_range
145 | f0_length = GetSamplesForDIO(fs, x_length, option.frame_period)
146 | cdef np.ndarray[double, ndim=1, mode="c"] f0 = \
147 | np.zeros(f0_length, dtype=np.dtype('float64'))
148 | cdef np.ndarray[double, ndim=1, mode="c"] temporal_positions = \
149 | np.zeros(f0_length, dtype=np.dtype('float64'))
150 | with (nogil, cython.boundscheck(False)):
151 | Dio(&x[0], x_length, fs, &option, &temporal_positions[0], &f0[0])
152 | return f0, temporal_positions
153 |
154 |
155 | def harvest(np.ndarray[double, ndim=1, mode="c"] x not None, int fs,
156 | f0_floor=default_f0_floor, f0_ceil=default_f0_ceil,
157 | frame_period=default_frame_period):
158 | """Harvest F0 extraction algorithm.
159 |
160 | Parameters
161 | ----------
162 | x : ndarray
163 | Input waveform signal.
164 | fs : int
165 | Sample rate of input signal in Hz.
166 | f0_floor : float
167 | Lower F0 limit in Hz.
168 | Default: 71.0
169 | f0_ceil : float
170 | Upper F0 limit in Hz.
171 | Default: 800.0
172 | frame_period : float
173 | Period between consecutive frames in milliseconds.
174 | Default: 5.0
175 |
176 | Returns
177 | -------
178 | f0 : ndarray
179 | Estimated F0 contour.
180 | temporal_positions : ndarray
181 | Temporal position of each frame.
182 | """
183 | cdef int x_length = len(x)
184 | cdef HarvestOption option
185 | InitializeHarvestOption(&option)
186 | option.f0_floor = f0_floor
187 | option.f0_ceil = f0_ceil
188 | option.frame_period = frame_period
189 | f0_length = GetSamplesForHarvest(fs, x_length, option.frame_period)
190 | cdef np.ndarray[double, ndim=1, mode="c"] f0 = \
191 | np.zeros(f0_length, dtype=np.dtype('float64'))
192 | cdef np.ndarray[double, ndim=1, mode="c"] temporal_positions = \
193 | np.zeros(f0_length, dtype=np.dtype('float64'))
194 | with (nogil, cython.boundscheck(False)):
195 | Harvest(&x[0], x_length, fs, &option, &temporal_positions[0], &f0[0])
196 | return f0, temporal_positions
197 |
198 |
199 | def stonemask(np.ndarray[double, ndim=1, mode="c"] x not None,
200 | np.ndarray[double, ndim=1, mode="c"] f0 not None,
201 | np.ndarray[double, ndim=1, mode="c"] temporal_positions not None,
202 | int fs):
203 | """StoneMask F0 refinement algorithm.
204 |
205 | Parameters
206 | ----------
207 | x : ndarray
208 | Input waveform signal.
209 | f0 : ndarray
210 | Input F0 contour.
211 | temporal_positions : ndarray
212 | Temporal positions of each frame.
213 | fs : int
214 | Sample rate of input signal in Hz.
215 |
216 | Returns
217 | -------
218 | refined_f0 : ndarray
219 | Refined F0 contour.
220 | """
221 | cdef int x_length = len(x)
222 | cdef int f0_length = len(f0)
223 | cdef np.ndarray[double, ndim=1, mode="c"] refined_f0 = \
224 | np.zeros(f0_length, dtype=np.dtype('float64'))
225 | with (nogil, cython.boundscheck(False)):
226 | StoneMask(&x[0], x_length, fs, &temporal_positions[0],
227 | &f0[0], f0_length, &refined_f0[0])
228 | return refined_f0
229 |
230 |
231 | def get_cheaptrick_fft_size(int fs, f0_floor=default_f0_floor):
232 | """Calculate suitable FFT size for CheapTrick given F0 floor.
233 |
234 | Parameters
235 | ----------
236 | fs : int
237 | Sample rate of input signal in Hz.
238 | f0_floor : float
239 | Lower F0 limit in Hz. The required FFT size is a direct
240 | consequence of the F0 floor used.
241 | Default: 71.0
242 |
243 | Returns
244 | -------
245 | fft_size : int
246 | Resulting FFT size.
247 | """
248 | cdef CheapTrickOption option
249 | option.f0_floor = f0_floor
250 | cdef int fft_size = GetFFTSizeForCheapTrick(fs, &option)
251 | return fft_size
252 |
253 | def get_cheaptrick_f0_floor(int fs, int fft_size):
254 | """Calculates actual lower F0 limit for CheapTrick
255 | based on the sampling frequency and FFT size used. Whenever F0 is below
256 | this threshold the spectrum will be analyzed as if the frame is unvoiced
257 | (using kDefaultF0 defined in constantnumbers.h).
258 |
259 | Parameters
260 | ----------
261 | fs : int
262 | Sample rate of input signal in Hz.
263 | fft_size : int
264 | FFT size used for CheapTrick.
265 |
266 | Returns
267 | -------
268 | f0_floor : float
269 | Resulting lower F0 limit in Hz.
270 | """
271 | cdef double f0_floor = GetF0FloorForCheapTrick(fs, fft_size)
272 | return f0_floor
273 |
274 | def cheaptrick(np.ndarray[double, ndim=1, mode="c"] x not None,
275 | np.ndarray[double, ndim=1, mode="c"] f0 not None,
276 | np.ndarray[double, ndim=1, mode="c"] temporal_positions not None,
277 | int fs,
278 | q1=-0.15, f0_floor=default_f0_floor, fft_size=None):
279 | """CheapTrick harmonic spectral envelope estimation algorithm.
280 |
281 | Parameters
282 | ----------
283 | x : ndarray
284 | Input waveform signal.
285 | f0 : ndarray
286 | Input F0 contour.
287 | temporal_positions : ndarray
288 | Temporal positions of each frame.
289 | fs : int
290 | Sample rate of input signal in Hz.
291 | q1 : float
292 | Spectral recovery parameter.
293 | Default: -0.15 (this value was tuned and normally does not need adjustment)
294 | f0_floor : float, None
295 | Lower F0 limit in Hz. Not used in case `fft_size` is specified.
296 | Default: 71.0
297 | fft_size : int, None
298 | FFT size to be used. When `None` (default) is used, the FFT size is computed
299 | automatically as a function of the given input sample rate and F0 floor.
300 | When `fft_size` is specified, the given `f0_floor` parameter is ignored.
301 | Default: None
302 |
303 | Returns
304 | -------
305 | spectrogram : ndarray
306 | Spectral envelope (squared magnitude).
307 | """
308 | cdef CheapTrickOption option
309 | InitializeCheapTrickOption(fs, &option)
310 | option.q1 = q1
311 | if fft_size is None:
312 | option.f0_floor = f0_floor # CheapTrickOption.f0_floor is only used in GetFFTSizeForCheapTrick()
313 | option.fft_size = GetFFTSizeForCheapTrick(fs, &option)
314 | else:
315 | option.fft_size = fft_size
316 | # the f0_floor used by CheapTrick() will be re-compute from this given fft_size
317 | cdef int x_length = len(x)
318 | cdef int f0_length = len(f0)
319 |
320 | cdef double[:, ::1] spectrogram = np.zeros((f0_length, option.fft_size//2 + 1),
321 | dtype=np.dtype('float64'))
322 | cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp)
323 | cdef double **cpp_spectrogram = ( &tmp[0])
324 | cdef np.intp_t i
325 | with (nogil, cython.boundscheck(False)):
326 | for i in range(f0_length):
327 | cpp_spectrogram[i] = &spectrogram[i, 0]
328 |
329 | CheapTrick(&x[0], x_length, fs, &temporal_positions[0],
330 | &f0[0], f0_length, &option, cpp_spectrogram)
331 | return np.array(spectrogram, dtype=np.float64)
332 |
333 |
334 | def d4c(np.ndarray[double, ndim=1, mode="c"] x not None,
335 | np.ndarray[double, ndim=1, mode="c"] f0 not None,
336 | np.ndarray[double, ndim=1, mode="c"] temporal_positions not None,
337 | int fs,
338 | threshold=0.85, fft_size=None):
339 | """D4C aperiodicity estimation algorithm.
340 |
341 | Parameters
342 | ----------
343 | x : ndarray
344 | Input waveform signal.
345 | f0 : ndarray
346 | Input F0 contour.
347 | temporal_positions : ndarray
348 | Temporal positions of each frame.
349 | fs : int
350 | Sample rate of input signal in Hz.
351 | q1 : float
352 | Spectral recovery parameter.
353 | Default: -0.15 (this value was tuned and normally does not need adjustment)
354 | threshold : float
355 | Threshold for aperiodicity-based voiced/unvoiced decision, in range 0 to 1.
356 | If a value of 0 is used, voiced frames will be kept voiced. If a value > 0 is
357 | used some voiced frames can be considered unvoiced by setting their aperiodicity
358 | to 1 (thus synthesizing them with white noise). Using `threshold=0` will result
359 | in the behavior of older versions of D4C. The current default of 0.85 is meant
360 | to be used in combination with the Harvest F0 estimator, which was designed to have
361 | a high voiced/unvoiced threshold (i.e. most frames will be considered voiced).
362 | Default: 0.85
363 | fft_size : int, None
364 | FFT size to be used. When `None` (default) is used, the FFT size is computed
365 | automatically as a function of the given input sample rate and the default F0 floor.
366 | When `fft_size` is specified, it should match the FFT size used to compute
367 | the spectral envelope (i.e. `fft_size=2*(sp.shape[1] - 1)`) in order to get the
368 | desired results when resynthesizing.
369 | Default: None
370 |
371 | Returns
372 | -------
373 | aperiodicity : ndarray
374 | Aperiodicity (envelope, linear magnitude relative to spectral envelope).
375 | """
376 | cdef int x_length = len(x)
377 | cdef int f0_length = len(f0)
378 | cdef int fft_size0
379 | if fft_size is None:
380 | fft_size0 = get_cheaptrick_fft_size(fs, default_f0_floor)
381 | else:
382 | fft_size0 = fft_size
383 |
384 | cdef D4COption option
385 | InitializeD4COption(&option)
386 | option.threshold = threshold
387 |
388 | cdef double[:, ::1] aperiodicity = np.zeros((f0_length, fft_size0//2 + 1),
389 | dtype=np.dtype('float64'))
390 | cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp)
391 | cdef double **cpp_aperiodicity = ( &tmp[0])
392 | cdef np.intp_t i
393 | with (nogil, cython.boundscheck(False)):
394 | for i in range(f0_length):
395 | cpp_aperiodicity[i] = &aperiodicity[i, 0]
396 |
397 | D4C(&x[0], x_length, fs, &temporal_positions[0],
398 | &f0[0], f0_length, fft_size0, &option,
399 | cpp_aperiodicity)
400 | return np.array(aperiodicity, dtype=np.float64)
401 |
402 |
403 | def synthesize(np.ndarray[double, ndim=1, mode="c"] f0 not None,
404 | np.ndarray[double, ndim=2, mode="c"] spectrogram not None,
405 | np.ndarray[double, ndim=2, mode="c"] aperiodicity not None,
406 | int fs,
407 | double frame_period=default_frame_period):
408 | """WORLD synthesis from parametric representation.
409 |
410 | Parameters
411 | ----------
412 | f0 : ndarray
413 | Input F0 contour.
414 | spectrogram : ndarray
415 | Spectral envelope.
416 | aperiodicity : ndarray
417 | Aperodicity envelope.
418 | fs : int
419 | Sample rate of input signal in Hz.
420 | frame_period : float
421 | Period between consecutive frames in milliseconds.
422 | Default: 5.0
423 |
424 | Returns
425 | -------
426 | y : ndarray
427 | Output waveform signal.
428 | """
429 | if (f0.shape[0] != spectrogram.shape[0] or
430 | f0.shape[0] != aperiodicity.shape[0]):
431 | raise ValueError('Mismatched number of frames between F0 ({:d}), '
432 | 'spectrogram ({:d}) and aperiodicty ({:d})'
433 | .format(f0.shape[0], spectrogram.shape[0],
434 | aperiodicity.shape[0]))
435 | if spectrogram.shape[1] != aperiodicity.shape[1]:
436 | raise ValueError('Mismatched dimensionality (spec size) between '
437 | 'spectrogram ({:d}) and aperiodicity ({:d})'
438 | .format(spectrogram.shape[1], aperiodicity.shape[1]))
439 |
440 | cdef int f0_length = len(f0)
441 | cdef int y_length = (f0_length * frame_period * fs / 1000)
442 | cdef int fft_size = (spectrogram.shape[1] - 1)*2
443 | cdef np.ndarray[double, ndim=1, mode="c"] y = \
444 | np.zeros(y_length, dtype=np.dtype('float64'))
445 |
446 | cdef double[:, ::1] spectrogram0 = spectrogram
447 | cdef double[:, ::1] aperiodicity0 = aperiodicity
448 | cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp)
449 | cdef np.intp_t[:] tmp2 = np.zeros(f0_length, dtype=np.intp)
450 | cdef double **cpp_spectrogram = ( &tmp[0])
451 | cdef double **cpp_aperiodicity = ( &tmp2[0])
452 | cdef np.intp_t i
453 | with (nogil, cython.boundscheck(False)):
454 | for i in range(f0_length):
455 | cpp_spectrogram[i] = &spectrogram0[i, 0]
456 | cpp_aperiodicity[i] = &aperiodicity0[i, 0]
457 |
458 | Synthesis(&f0[0], f0_length, cpp_spectrogram,
459 | cpp_aperiodicity, fft_size, frame_period, fs, y_length, &y[0])
460 | return y
461 |
462 |
463 | def get_num_aperiodicities(fs):
464 | """Calculate the required dimensionality to code D4C aperiodicity.
465 |
466 | Parameters
467 | ----------
468 | fs : int
469 | Sample rate of input signal in Hz.
470 |
471 | Returns
472 | -------
473 | n_aper : int
474 | Required number of coefficients.
475 | """
476 | cdef int n_aper = GetNumberOfAperiodicities(fs)
477 | return n_aper
478 |
479 | def code_aperiodicity(np.ndarray[double, ndim=2, mode="c"] aperiodicity, fs):
480 | """Reduce dimensionality of D4C aperiodicity.
481 |
482 | Parameters
483 | ----------
484 | aperiodicity : ndarray
485 | Aperodicity envelope.
486 | fs : int
487 | Sample rate of input signal in Hz.
488 |
489 | Returns
490 | -------
491 | coded_aperiodicity : ndarray
492 | Coded aperiodicity envelope.
493 | """
494 | cdef int ap_length = len(aperiodicity)
495 | cdef int fft_size = (aperiodicity.shape[1] - 1)*2
496 | cdef int n_coded_aper = get_num_aperiodicities(fs)
497 |
498 | cdef double[:, ::1] aper = aperiodicity
499 | cdef double[:, ::1] coded_aper = np.zeros((ap_length, n_coded_aper),
500 | dtype=np.dtype('float64'))
501 | cdef np.intp_t[:] tmp1 = np.zeros(ap_length, dtype=np.intp)
502 | cdef np.intp_t[:] tmp2 = np.zeros(ap_length, dtype=np.intp)
503 | cdef double **cpp_aper = ( &tmp1[0])
504 | cdef double **cpp_coded_aper = ( &tmp2[0])
505 | cdef np.intp_t i
506 | for i in range(ap_length):
507 | cpp_aper[i] = &aper[i, 0]
508 | cpp_coded_aper[i] = &coded_aper[i, 0]
509 |
510 | CodeAperiodicity(cpp_aper, ap_length, fs,
511 | fft_size, cpp_coded_aper)
512 |
513 | return np.array(coded_aper, dtype=np.float64)
514 |
515 | def decode_aperiodicity(np.ndarray[double, ndim=2, mode="c"] coded_aperiodicity,
516 | fs, fft_size):
517 | """Restore full dimensionality of coded D4C aperiodicity.
518 |
519 | Parameters
520 | ----------
521 | coded_aperiodicity : ndarray
522 | Coded aperodicity envelope.
523 | fs : int
524 | Sample rate of input signal in Hz.
525 | fft_size : int
526 | FFT size corresponding to the full dimensional aperiodicity.
527 |
528 | Returns
529 | -------
530 | aperiodicity : ndarray
531 | Aperiodicity envelope.
532 | """
533 | cdef int ap_length = len(coded_aperiodicity)
534 | cdef int n_coded_aper = get_num_aperiodicities(fs)
535 | if n_coded_aper != coded_aperiodicity.shape[1]:
536 | raise ValueError('Invalid aperiodicity code dimensionality '
537 | '(was: {:d}, expected: {:d})'
538 | .format(coded_aperiodicity.shape[1], n_coded_aper))
539 |
540 | cdef double[:, ::1] coded_aper = coded_aperiodicity
541 | cdef double[:, ::1] aper = np.zeros((ap_length, fft_size//2 + 1),
542 | dtype=np.dtype('float64'))
543 | cdef np.intp_t[:] tmp1 = np.zeros(ap_length, dtype=np.intp)
544 | cdef np.intp_t[:] tmp2 = np.zeros(ap_length, dtype=np.intp)
545 | cdef double **cpp_coded_aper = ( &tmp1[0])
546 | cdef double **cpp_aper = ( &tmp2[0])
547 | cdef np.intp_t i
548 | for i in range(ap_length):
549 | cpp_coded_aper[i] = &coded_aper[i, 0]
550 | cpp_aper[i] = &aper[i, 0]
551 |
552 | DecodeAperiodicity(cpp_coded_aper, ap_length, fs, fft_size, cpp_aper)
553 |
554 | return np.array(aper, dtype=np.float64)
555 |
556 | def code_spectral_envelope(np.ndarray[double, ndim=2, mode="c"] spectrogram, fs,
557 | number_of_dimensions):
558 | """Reduce dimensionality of spectral envelope.
559 |
560 | Parameters
561 | ----------
562 | spectrogram : ndarray
563 | Spectral envelope.
564 | fs : int
565 | Sample rate of input signal in Hz.
566 | number_of_dimensions : int
567 | Number of dimentions of coded spectral envelope
568 |
569 | Returns
570 | -------
571 | coded_spectral_envelope : ndarray
572 | Coded spectral envelope.
573 | """
574 | cdef int sp_length = len(spectrogram)
575 | cdef int fft_size = (spectrogram.shape[1] - 1)*2
576 |
577 | cdef double[:, ::1] sp = spectrogram
578 | cdef double[:, ::1] coded_sp = np.zeros((sp_length, number_of_dimensions),
579 | dtype=np.dtype('float64'))
580 | cdef np.intp_t[:] tmp1 = np.zeros(sp_length, dtype=np.intp)
581 | cdef np.intp_t[:] tmp2 = np.zeros(sp_length, dtype=np.intp)
582 | cdef double **cpp_sp = ( &tmp1[0])
583 | cdef double **cpp_coded_sp = ( &tmp2[0])
584 | cdef np.intp_t i
585 | for i in range(sp_length):
586 | cpp_sp[i] = &sp[i, 0]
587 | cpp_coded_sp[i] = &coded_sp[i, 0]
588 |
589 | CodeSpectralEnvelope(cpp_sp, sp_length, fs, fft_size,
590 | number_of_dimensions, cpp_coded_sp)
591 |
592 | return np.array(coded_sp, dtype=np.float64)
593 |
594 | def decode_spectral_envelope(np.ndarray[double, ndim=2, mode="c"] coded_spectral_envelope,
595 | fs, fft_size):
596 | """Restore full dimensionality of coded spectral envelope.
597 |
598 | Parameters
599 | ----------
600 | coded_spectral_envelope : ndarray
601 | Coded spectral envelope.
602 | fs : int
603 | Sample rate of input signal in Hz.
604 | fft_size : int
605 | FFT size corresponding to the full dimensional spectral envelope.
606 |
607 | Returns
608 | -------
609 | spectrogram : ndarray
610 | Spectral envelope.
611 | """
612 | cdef int sp_length = len(coded_spectral_envelope)
613 | cdef int number_of_dimensions = len(coded_spectral_envelope[0])
614 | cdef double[:, ::1] coded_sp = coded_spectral_envelope
615 | cdef double[:, ::1] sp = np.zeros((sp_length, fft_size//2 + 1),
616 | dtype=np.dtype('float64'))
617 | cdef np.intp_t[:] tmp1 = np.zeros(sp_length, dtype=np.intp)
618 | cdef np.intp_t[:] tmp2 = np.zeros(sp_length, dtype=np.intp)
619 | cdef double **cpp_coded_sp = ( &tmp1[0])
620 | cdef double **cpp_sp = ( &tmp2[0])
621 | cdef np.intp_t i
622 | for i in range(sp_length):
623 | cpp_coded_sp[i] = &coded_sp[i, 0]
624 | cpp_sp[i] = &sp[i, 0]
625 |
626 | DecodeSpectralEnvelope(cpp_coded_sp, sp_length, fs, fft_size,
627 | number_of_dimensions, cpp_sp)
628 |
629 | return np.array(sp, dtype=np.float64)
630 |
631 | def wav2world(x, fs, fft_size=None, frame_period=default_frame_period):
632 | """Convenience function to do all WORLD analysis steps in a single call.
633 |
634 | In this case only `frame_period` can be configured and other parameters
635 | are fixed to their defaults. Likewise, F0 estimation is fixed to
636 | DIO plus StoneMask refinement.
637 |
638 | Parameters
639 | ----------
640 | x : ndarray
641 | Input waveform signal.
642 | fs : int
643 | Sample rate of input signal in Hz.
644 | fft_size : int
645 | Length of Fast Fourier Transform (in number of samples)
646 | The resulting dimension of `ap` adn `sp` will be `fft_size` // 2 + 1
647 | frame_period : float
648 | Period between consecutive frames in milliseconds.
649 | Default: 5.0
650 |
651 | Returns
652 | -------
653 | f0 : ndarray
654 | F0 contour.
655 | sp : ndarray
656 | Spectral envelope.
657 | ap : ndarray
658 | Aperiodicity.
659 | """
660 | _f0, t = dio(x, fs, frame_period=frame_period)
661 | f0 = stonemask(x, _f0, t, fs)
662 | sp = cheaptrick(x, f0, t, fs, fft_size=fft_size)
663 | ap = d4c(x, f0, t, fs, fft_size=fft_size)
664 | return f0, sp, ap
665 |
--------------------------------------------------------------------------------