├── .gitignore ├── .gitmodules ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── appveyor.yml ├── demo ├── demo.py └── utterance │ └── vaiueo2d.wav ├── pyproject.toml ├── pyworld ├── __init__.py └── pyworld.pyx ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | test/ 2 | *.cpp 3 | *.pyd 4 | .pypirc 5 | .vscode 6 | 7 | # Created by https://www.gitignore.io 8 | 9 | ### Python ### 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | env/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | # PyBuilder 63 | target/ 64 | 65 | 66 | ### IPythonNotebook ### 67 | # Temporary data 68 | .ipynb_checkpoints/ 69 | 70 | 71 | ### SublimeText ### 72 | # cache files for sublime text 73 | *.tmlanguage.cache 74 | *.tmPreferences.cache 75 | *.stTheme.cache 76 | 77 | # workspace files are user-specific 78 | *.sublime-workspace 79 | 80 | # project files should be checked into the repository, unless a significant 81 | # proportion of contributors will probably not be using SublimeText 82 | # *.sublime-project 83 | 84 | # sftp configuration file 85 | sftp-config.json 86 | 87 | 88 | ### Emacs ### 89 | # -*- mode: gitignore; -*- 90 | *~ 91 | \#*\# 92 | /.emacs.desktop 93 | /.emacs.desktop.lock 94 | *.elc 95 | auto-save-list 96 | tramp 97 | .\#* 98 | 99 | # Org-mode 100 | .org-id-locations 101 | *_archive 102 | 103 | # flymake-mode 104 | *_flymake.* 105 | 106 | # eshell files 107 | /eshell/history 108 | /eshell/lastdir 109 | 110 | # elpa packages 111 | /elpa/ 112 | 113 | # reftex files 114 | *.rel 115 | 116 | # AUCTeX auto folder 117 | /auto/ 118 | 119 | # cask packages 120 | .cask/ 121 | 122 | 123 | ### Vim ### 124 | [._]*.s[a-w][a-z] 125 | [._]s[a-w][a-z] 126 | *.un~ 127 | Session.vim 128 | .netrwhist 129 | *~ 130 | 131 | 132 | ### C++ ### 133 | # Compiled Object files 134 | *.slo 135 | *.lo 136 | *.o 137 | *.obj 138 | 139 | # Precompiled Headers 140 | *.gch 141 | *.pch 142 | 143 | # Compiled Dynamic libraries 144 | *.so 145 | *.dylib 146 | *.dll 147 | 148 | # Fortran module files 149 | *.mod 150 | 151 | # Compiled Static libraries 152 | *.lai 153 | *.la 154 | *.a 155 | *.lib 156 | 157 | # Executables 158 | *.exe 159 | *.out 160 | *.app 161 | 162 | 163 | ### OSX ### 164 | .DS_Store 165 | .AppleDouble 166 | .LSOverride 167 | 168 | # Icon must end with two \r 169 | Icon 170 | 171 | 172 | # Thumbnails 173 | ._* 174 | 175 | # Files that might appear on external disk 176 | .Spotlight-V100 177 | .Trashes 178 | 179 | # Directories potentially created on remote AFP share 180 | .AppleDB 181 | .AppleDesktop 182 | Network Trash Folder 183 | Temporary Items 184 | .apdisk 185 | 186 | 187 | ### Linux ### 188 | *~ 189 | 190 | # KDE directory preferences 191 | .directory 192 | 193 | # Linux trash folder which might appear on any partition or disk 194 | .Trash-* 195 | test/ 196 | *.cpp 197 | *.pyd 198 | .pypirc 199 | .vscode 200 | 201 | # Created by https://www.gitignore.io 202 | 203 | ### Python ### 204 | # Byte-compiled / optimized / DLL files 205 | __pycache__/ 206 | *.py[cod] 207 | 208 | # C extensions 209 | *.so 210 | 211 | # Distribution / packaging 212 | .Python 213 | env/ 214 | build/ 215 | develop-eggs/ 216 | dist/ 217 | downloads/ 218 | eggs/ 219 | lib/ 220 | lib64/ 221 | parts/ 222 | sdist/ 223 | var/ 224 | *.egg-info/ 225 | .installed.cfg 226 | *.egg 227 | 228 | # PyInstaller 229 | # Usually these files are written by a python script from a template 230 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 231 | *.manifest 232 | *.spec 233 | 234 | # Installer logs 235 | pip-log.txt 236 | pip-delete-this-directory.txt 237 | 238 | # Unit test / coverage reports 239 | htmlcov/ 240 | .tox/ 241 | .coverage 242 | .cache 243 | nosetests.xml 244 | coverage.xml 245 | 246 | # Translations 247 | *.mo 248 | *.pot 249 | 250 | # Django stuff: 251 | *.log 252 | 253 | # Sphinx documentation 254 | docs/_build/ 255 | 256 | # PyBuilder 257 | target/ 258 | 259 | 260 | ### IPythonNotebook ### 261 | # Temporary data 262 | .ipynb_checkpoints/ 263 | 264 | 265 | ### SublimeText ### 266 | # cache files for sublime text 267 | *.tmlanguage.cache 268 | *.tmPreferences.cache 269 | *.stTheme.cache 270 | 271 | # workspace files are user-specific 272 | *.sublime-workspace 273 | 274 | # project files should be checked into the repository, unless a significant 275 | # proportion of contributors will probably not be using SublimeText 276 | # *.sublime-project 277 | 278 | # sftp configuration file 279 | sftp-config.json 280 | 281 | 282 | ### Emacs ### 283 | # -*- mode: gitignore; -*- 284 | *~ 285 | \#*\# 286 | /.emacs.desktop 287 | /.emacs.desktop.lock 288 | *.elc 289 | auto-save-list 290 | tramp 291 | .\#* 292 | 293 | # Org-mode 294 | .org-id-locations 295 | *_archive 296 | 297 | # flymake-mode 298 | *_flymake.* 299 | 300 | # eshell files 301 | /eshell/history 302 | /eshell/lastdir 303 | 304 | # elpa packages 305 | /elpa/ 306 | 307 | # reftex files 308 | *.rel 309 | 310 | # AUCTeX auto folder 311 | /auto/ 312 | 313 | # cask packages 314 | .cask/ 315 | 316 | 317 | ### Vim ### 318 | [._]*.s[a-w][a-z] 319 | [._]s[a-w][a-z] 320 | *.un~ 321 | Session.vim 322 | .netrwhist 323 | *~ 324 | 325 | 326 | ### C++ ### 327 | # Compiled Object files 328 | *.slo 329 | *.lo 330 | *.o 331 | *.obj 332 | 333 | # Precompiled Headers 334 | *.gch 335 | *.pch 336 | 337 | # Compiled Dynamic libraries 338 | *.so 339 | *.dylib 340 | *.dll 341 | 342 | # Fortran module files 343 | *.mod 344 | 345 | # Compiled Static libraries 346 | *.lai 347 | *.la 348 | *.a 349 | *.lib 350 | 351 | # Executables 352 | *.exe 353 | *.out 354 | *.app 355 | 356 | 357 | ### OSX ### 358 | .DS_Store 359 | .AppleDouble 360 | .LSOverride 361 | 362 | # Icon must end with two \r 363 | Icon 364 | 365 | 366 | # Thumbnails 367 | ._* 368 | 369 | # Files that might appear on external disk 370 | .Spotlight-V100 371 | .Trashes 372 | 373 | # Directories potentially created on remote AFP share 374 | .AppleDB 375 | .AppleDesktop 376 | Network Trash Folder 377 | Temporary Items 378 | .apdisk 379 | 380 | 381 | ### Linux ### 382 | *~ 383 | 384 | # KDE directory preferences 385 | .directory 386 | 387 | # Linux trash folder which might appear on any partition or disk 388 | .Trash-* 389 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib/World"] 2 | path = lib/World 3 | url = https://github.com/mmorise/World 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: 2 | - python 3 | 4 | python: 5 | - "2.7" 6 | - "3.5" 7 | - "3.6" 8 | - "3.7" 9 | - "3.8" 10 | - "3.9" 11 | # - "3.10" # this doesn't work 12 | 13 | env: 14 | global: 15 | TWINE_USERNAME: JeremyCCHsu 16 | TWINE_PASSWORD: 17 | secure: K22xeLmYvib1wUQyRJOQ+wjvFVK88HNCsGWVXdCFaIfR9F5QAKMmh+RsyKCS5JdMdZF+RtDDgcfsMOIlscGm/crgwzAN9OijKmj4WZGDPR4BY8O0TZGT2n1tfRJjGdmEwMfftmIfHRjX91iZjCW5k7EAwYvV0d+hAdUrSI1FGOKYPpMVusMYNih3AtPbobpTCwgaKddwCVw1YoCEEgSuXfpyZfQzEirEGViJfXr6EmojVoVYi1oYlw4OhAKg+OVfTxVEVsT37xbPp0sjcADYVWr1kXAWF86fGyon+eSHMc5AewoP8tMNSeqyZuQ23EHx2xuK8Ipjv3C0xy+RmMPBdsi5xhbclElNvBH3OjL623x5JYziU+rsckh/6687vFzLg5twlfRDfwFwKHA4UVdSP0Uei8ebltHJwufyKiKjVMllJpbc9NOKJjSF+n0/ddwaebYbWgeZo6wvOnvuZb1d3TyGUktNfdypINNkd6D/RFd6l3kN4IhvBJqP7bZsSsrFoIdDL6to005Hy1Jb8IuUFihFC5BbWcFqd3BefOvZWmhpkZJRkTEN6pCEf0oNFxtWL6NuxwkbVZVcBgKAHAAz/4rKh6r0IipqfWfxKU3N9Phz998LfNwK2ZkQ32gDYdJ5abTY7v50J6s99U4DzSOaD4c3NoYE/VxyW/G1yo2FclQ= 18 | 19 | # works on 3.10 but requires re-installing numpy 20 | matrix: 21 | include: 22 | - python: 3.10 23 | dist: jammy 24 | sudo: true 25 | - python: 3.11 26 | dist: jammy 27 | sudo: true 28 | 29 | before_install: 30 | - sudo apt-get install -y libsndfile1 31 | - sudo apt-get install -y libssl3 32 | 33 | install: 34 | - pip install -U pip 35 | - pip install -r requirements.txt 36 | - pip install -e . 37 | 38 | script: 39 | - cd demo 40 | - python demo.py 41 | - cd .. 42 | 43 | 44 | # before_deploy: 45 | # - python setup.py bdist_wheel 46 | 47 | deploy: 48 | provider: pypi 49 | user: __token__ 50 | password: 51 | secure: WF5VmRL0iTK/QPwlMzURfzNdjdvYbPf9QeiEImyaoofS0nsLOARQM+lPuKnXQdv206DOstrokzsYKfLJtfXflTB6AvVSD4OUQbzzpc+rKtHcg6GN+X27zvaf/8NnxOeaEKEcTvP5hEaGxP3Q7h7q3Q51Cwbl/aa0MmHHvik9l2KEVGvDvviieQzQWGxi2YH9xMl8kwpB62Byy13Fk3Jg0jr4DAO7PzIUjlT59GZ1K3elAFqREcJ64+RZQTMHi3Kikiq1QOU1etAq36iGag1RfW5sd35LS4b0CkCaGVfhrv7RG/oQtHPR+3bAedGKkJAUj+xo0ra+9+L0D8OJ8/MwlHXapd24+WGGMdAcUuKvaahj1D+Bn09XNcw0wLu6fCllPbfTMNFWHbB5SbnFhICW2mXNhl1KJ+IQmGW3mW8VHW+1bF6+XCCMzRVezetG6XLVY9ptDxM3mE3EWpaU102g9NXaWkBb/5FuNj+QO1P86WonpQTQLGMnAx5veVMiUhDEeXi6aMelL8hZnimdYaa/5vOJqo6k1OjzPNEhlrazYaU18OI+/Gtg7iDTdu99yHDMiAeq4kRN5lBfiX0PZrttZoJW0P0u44OO7MrQJ54Lz64KevdxPynh9WGJVWLkNaql3d91jvs0xaQwu1X3gbnhE19aEtMICwEfS53TYxl/fWs= 52 | on: 53 | tags: true 54 | skip_cleanup: true 55 | skip_existing: true 56 | # distributions: "sdist bdist_wheel" -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright 2016 pyworld contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md LICENSE 2 | recursive-include lib *.cpp *.h LICENSE.txt 3 | recursive-include pyworld *.cpp *.pxd *.pyx 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyWORLD - A Python wrapper of WORLD Vocoder 2 | 3 | 4 | | **`Linux`** | **`Windows`** | 5 | |-----------------|-----------| 6 | | [![Build Status](https://app.travis-ci.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder.svg?branch=master)](https://app.travis-ci.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder) | [![Build Status](https://ci.appveyor.com/api/projects/status/github/JeremyCCHsu/Python-Wrapper-for-World-Vocoder?svg=true)](https://ci.appveyor.com/project/JeremyCCHsu/python-wrapper-for-world-vocoder) | 7 | 8 | 9 | 10 | 11 | WORLD Vocoder is a fast and high-quality vocoder 12 | which parameterizes speech into three components: 13 | 14 | 1. `f0`: Pitch contour 15 | 2. `sp`: Harmonic spectral envelope 16 | 3. `ap`: Aperiodic spectral envelope (relative to the harmonic spectral envelope) 17 | 18 | It can also (re)synthesize speech using these features (see examples below). 19 | 20 | For more information, please visit Dr. Morise's [WORLD repository](https://github.com/mmorise/World) 21 | and the [official website of WORLD Vocoder](http://ml.cs.yamanashi.ac.jp/world/english) 22 | 23 | 24 | ## APIs 25 | 26 | ### Vocoder Functions 27 | ```python 28 | import pyworld as pw 29 | _f0, t = pw.dio(x, fs) # raw pitch extractor 30 | f0 = pw.stonemask(x, _f0, t, fs) # pitch refinement 31 | sp = pw.cheaptrick(x, f0, t, fs) # extract smoothed spectrogram 32 | ap = pw.d4c(x, f0, t, fs) # extract aperiodicity 33 | 34 | y = pw.synthesize(f0, sp, ap, fs) # synthesize an utterance using the parameters 35 | ``` 36 | 37 | 38 | ### Utility 39 | ```python 40 | # Convert speech into features (using default arguments) 41 | f0, sp, ap = pw.wav2world(x, fs) 42 | ``` 43 |
44 | 45 | You can change the default arguments of the function, too. 46 | See more info using `help`. 47 | 48 | 49 | ## Installation 50 | 51 | ### Using Pip 52 | `pip install pyworld` 53 |
54 | 55 | ### Building from Source 56 | ```bash 57 | git clone https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder.git 58 | cd Python-Wrapper-for-World-Vocoder 59 | git submodule update --init 60 | pip install -U pip 61 | pip install -r requirements.txt 62 | pip install . 63 | ``` 64 | It will automatically `git clone` Morise's World Vocoder (C++ version).
65 | (It seems to me that using `virtualenv` or `conda` is the best practice.)
66 |
67 | 68 | ### Installation Validation 69 | You can validate installation by running 70 | ```bash 71 | cd demo 72 | python demo.py 73 | ``` 74 | to see if you get results in `test/` direcotry. 75 | (Please avoid writing and executing codes in the `Python-Wrapper-for-World-Vocoder` folder for now.)
76 | 77 | ### Environment/Dependencies 78 | - Operating systems 79 | - Linux Ubuntu 14.04+ 80 | - Windows (thanks to [wuaalb](https://github.com/wuaalb)) 81 | - WSL 82 | - Python 83 | - 3.7+ 84 | 85 | You can install dependencies these by `pip install -r requirements.txt` 86 | 87 | 88 | 89 | ## Notice 90 | - WORLD vocoder is designed for speech sampled ≥ 16 kHz. 91 | Applying WORLD to 8 kHz speech will fail. 92 | See a possible workaround [here](https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/issues/54). 93 | - When the SNR is low, extracting pitch using `harvest` instead of `dio` 94 | is a better option. 95 | 96 | 97 | ## Troubleshooting 98 | 1. Upgrade your Cython version to 0.24.
99 | (I failed to build it on Cython 0.20.1post0)
100 | It'll require you to download Cython form http://cython.org/
101 | Unzip it, and `python setup.py install` it.
102 | (I tried `pip install Cython` but the upgrade didn't seem correct)
103 | (Again, add `--user` if you don't have root access.) 104 | 2. Upon executing `demo/demo.py`, the following code might be needed in some environments (e.g. when you're working on a remote Linux server):
105 | 106 | ```python 107 | import matplotlib 108 | matplotlib.use('Agg') 109 | ``` 110 | 3. If you encounter `library not found: sndfile` error upon executing `demo.py`, 111 | you might have to install it by `apt-get install libsoundfile1`. 112 | You can also replace `pysoundfile` with `scipy` or `librosa`, but some modification is needed: 113 | - librosa: 114 | - load(fiilename, dtype=np.float64) 115 | - output.write_wav(filename, wav, fs) 116 | - remember to pass `dtype` argument to ensure that the method gives you a `double`. 117 | - scipy: 118 | - You'll have to write a customized utility function based on the following methods 119 | - scipy.io.wavfile.read (but this gives you `short`) 120 | - scipy.io.wavfile.write 121 | 122 | 4. If you have installation issue on Windows, I probably could not provide 123 | much help because my development environment is Ubuntu 124 | and Windows Subsystem for Linux ([read this if you are interested in installing it](https://github.com/JeremyCCHsu/wsl)). 125 | 126 | 127 | ### Other Installation Suggestions 128 | 1. Use `pip install .` is safer and you can easily uninstall pyworld by `pip uninstall pyworld` 129 | - For Mac users: You might need to do `MACOSX_DEPLOYMENT_TARGET=10.9 pip install .` See [issue](https://github.com/SeanNaren/warp-ctc/issues/129#issuecomment-502349652). 130 | 2. Another way to install pyworld is via
131 | `python setup.py install`
132 | - Add `--user` if you don't have root access
133 | - Add `--record install.txt` to track the installation dir
134 | 3. If you just want to try out some experiments, execute
135 | `python setup.py build_ext --inplace`
136 | Then you can use PyWorld from this directory.
137 | You can also copy the resulting **pyworld.so** (pyworld.{arch}.pyd on Windows) file to 138 | `~/.local/lib/python2.7/site-packages` (or corresponding Windows directory) 139 | so that you can use it everywhere like an installed package.
140 | Alternatively you can copy/symlink the compiled files using pip, e.g. `pip install -e .` 141 | 142 | 143 | 144 | ## Acknowledgement 145 | Thank all contributors ([tats-u](https://github.com/tats-u), [wuaalb](https://github.com/wuaalb), [r9y9](https://github.com/r9y9), [rikrd](https://github.com/rikrd), [kudan2510](https://github.com/kundan2510)) for making this repo better and [sotelo](https://github.com/sotelo) whose [world.py](https://github.com/sotelo/world.py) inspired this repo.
146 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | TWINE_USERNAME: __token__ 3 | TWINE_PASSWORD: 4 | secure: A+ks10m1F2uReMyFye6x7fEWxuKCAFTn3Qu+Lw8T/4g3ExgSz9N8LxIEvXC+RkQwjTarXJyCOM8er4OsfYbQzfIfdMxgsuEuSY3TiPj2AssIXuNp7Xps56cRT+AQqXMp4UJ913Ryq1yKvnJHq91c6Lptx7WtWvbixrUI6p+ocV8zb8rO37KHUXR864z9h87dpwiY7ovKhV5+RF6AR+rmdt7nsaPLAd3M65r7M9v/sebaMNjhau5sizD3cwxOlzh/xVWhXABcMd+VlIhtq3xa4g== 5 | 6 | matrix: 7 | - PYTHON_VERSION: "3.6" 8 | PYTHON_ARCH: "64" 9 | MINICONDA: C:\Miniconda3-x64 10 | 11 | - PYTHON_VERSION: "3.6" 12 | PYTHON_ARCH: "32" 13 | MINICONDA: C:\Miniconda3 14 | 15 | - PYTHON_VERSION: "3.7" 16 | PYTHON_ARCH: "64" 17 | MINICONDA: C:\Miniconda3-x64 18 | 19 | - PYTHON_VERSION: "3.7" 20 | PYTHON_ARCH: "32" 21 | MINICONDA: C:\Miniconda3 22 | 23 | - PYTHON_VERSION: "3.8" 24 | PYTHON_ARCH: "64" 25 | MINICONDA: C:\Miniconda3-x64 26 | 27 | # - PYTHON_VERSION: "3.8" 28 | # PYTHON_ARCH: "32" 29 | # MINICONDA: C:\Miniconda3 30 | 31 | - PYTHON_VERSION: "3.9" 32 | PYTHON_ARCH: "64" 33 | MINICONDA: C:\Miniconda3-x64 34 | 35 | # - PYTHON_VERSION: "3.9" 36 | # PYTHON_ARCH: "32" 37 | # MINICONDA: C:\Miniconda3 38 | 39 | - PYTHON_VERSION: "3.10" 40 | PYTHON_ARCH: "64" 41 | MINICONDA: C:\Miniconda3-x64 42 | 43 | # - PYTHON_VERSION: "3.10" 44 | # PYTHON_ARCH: "32" 45 | # MINICONDA: C:\Miniconda3 46 | 47 | - PYTHON_VERSION: "3.11" 48 | PYTHON_ARCH: "64" 49 | MINICONDA: C:\Miniconda3-x64 50 | 51 | # The above commented lines has problem executing the `test_script` 52 | # Python >= 3.8 (32-bit) 53 | 54 | # platform: x64 55 | 56 | init: 57 | - ps: | 58 | Write-Output "*** Python $env:PYTHON_VERSION ***" 59 | $env:Path += ";$env:PYTHON_ROOT;$env:PYTHON_ROOT\Scripts" 60 | 61 | install: 62 | - ps: | 63 | "INFO: Variables" 64 | "INFO: is a tag: $Env:APPVEYOR_REPO_TAG" 65 | "INFO: $APPVEYOR_REPO_TAG" 66 | "INFO: tag name: $Env:APPVEYOR_REPO_TAG_NAME" 67 | "INFO: $APPVEYOR_REPO_TAG_NAME" 68 | - "SET PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%" 69 | - conda config --set always_yes yes --set changeps1 no 70 | - conda update -q conda 71 | - conda config --add channels pypi 72 | - conda info -a 73 | - "conda create -q -n test-environment python=%PYTHON_VERSION% numpy cython wheel matplotlib" 74 | - activate test-environment 75 | - python -m pip install --no-cache-dir -U pip 76 | - ps: | 77 | Write-Output "Installing requirements..." 78 | pip install -r requirements.txt 79 | 80 | build_script: 81 | - git submodule update --init 82 | - pip install -e . 83 | 84 | test_script: 85 | - ps: | 86 | $ErrorActionPreference = "Stop" 87 | Set-Location "$env:APPVEYOR_BUILD_FOLDER\demo" 88 | python demo.py 89 | Set-Location "$env:APPVEYOR_BUILD_FOLDER" 90 | 91 | after_test: 92 | - pip wheel --wheel-dir=.\dist --no-deps . 93 | 94 | artifacts: 95 | - path: dist\* 96 | 97 | deploy_script: 98 | - ps: | 99 | $ErrorActionPreference = "Stop" 100 | if ($Env:APPVEYOR_REPO_TAG -eq "true"){ 101 | pip install twine 102 | pip install -U urllib3==1.26.6 103 | twine upload dist/*.whl 104 | } 105 | else{ 106 | "Not a tag (skipped)" 107 | "$APPVEYOR_REPO_TAG" 108 | "$APPVEYOR_REPO_TAG_NAME" 109 | } 110 | # -and $APPVEYOR_REPO_TAG_NAME -match "^v([0-9.a-z]+)$") { 111 | -------------------------------------------------------------------------------- /demo/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | 3 | import os 4 | from shutil import rmtree 5 | import argparse 6 | 7 | import numpy as np 8 | 9 | import matplotlib # Remove this line if you don't need them 10 | matplotlib.use('Agg') # Remove this line if you don't need them 11 | import matplotlib.pyplot as plt 12 | 13 | import soundfile as sf 14 | # import librosa 15 | import pyworld as pw 16 | 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument("-f", "--frame_period", type=float, default=5.0) 20 | parser.add_argument("-s", "--speed", type=int, default=1) 21 | 22 | 23 | EPSILON = 1e-8 24 | 25 | def savefig(filename, figlist, log=True): 26 | #h = 10 27 | n = len(figlist) 28 | # peek into instances 29 | f = figlist[0] 30 | if len(f.shape) == 1: 31 | plt.figure() 32 | for i, f in enumerate(figlist): 33 | plt.subplot(n, 1, i+1) 34 | if len(f.shape) == 1: 35 | plt.plot(f) 36 | plt.xlim([0, len(f)]) 37 | elif len(f.shape) == 2: 38 | Nsmp, dim = figlist[0].shape 39 | #figsize=(h * float(Nsmp) / dim, len(figlist) * h) 40 | #plt.figure(figsize=figsize) 41 | plt.figure() 42 | for i, f in enumerate(figlist): 43 | plt.subplot(n, 1, i+1) 44 | if log: 45 | x = np.log(f + EPSILON) 46 | else: 47 | x = f + EPSILON 48 | plt.imshow(x.T, origin='lower', interpolation='none', aspect='auto', extent=(0, x.shape[0], 0, x.shape[1])) 49 | else: 50 | raise ValueError('Input dimension must < 3.') 51 | plt.savefig(filename) 52 | # plt.close() 53 | 54 | 55 | def main(args): 56 | if os.path.isdir('test'): 57 | rmtree('test') 58 | os.mkdir('test') 59 | 60 | x, fs = sf.read('utterance/vaiueo2d.wav') 61 | # x, fs = librosa.load('utterance/vaiueo2d.wav', dtype=np.float64) 62 | 63 | # 1. A convient way 64 | f0, sp, ap = pw.wav2world(x, fs) # use default options 65 | y = pw.synthesize(f0, sp, ap, fs, pw.default_frame_period) 66 | 67 | # 2. Step by step 68 | # 2-1 Without F0 refinement 69 | _f0, t = pw.dio(x, fs, f0_floor=50.0, f0_ceil=600.0, 70 | channels_in_octave=2, 71 | frame_period=args.frame_period, 72 | speed=args.speed) 73 | _sp = pw.cheaptrick(x, _f0, t, fs) 74 | _ap = pw.d4c(x, _f0, t, fs) 75 | _y = pw.synthesize(_f0, _sp, _ap, fs, args.frame_period) 76 | # librosa.output.write_wav('test/y_without_f0_refinement.wav', _y, fs) 77 | sf.write('test/y_without_f0_refinement.wav', _y, fs) 78 | 79 | # 2-2 DIO with F0 refinement (using Stonemask) 80 | f0 = pw.stonemask(x, _f0, t, fs) 81 | sp = pw.cheaptrick(x, f0, t, fs) 82 | ap = pw.d4c(x, f0, t, fs) 83 | y = pw.synthesize(f0, sp, ap, fs, args.frame_period) 84 | # librosa.output.write_wav('test/y_with_f0_refinement.wav', y, fs) 85 | sf.write('test/y_with_f0_refinement.wav', y, fs) 86 | 87 | # 2-3 Harvest with F0 refinement (using Stonemask) 88 | _f0_h, t_h = pw.harvest(x, fs) 89 | f0_h = pw.stonemask(x, _f0_h, t_h, fs) 90 | sp_h = pw.cheaptrick(x, f0_h, t_h, fs) 91 | ap_h = pw.d4c(x, f0_h, t_h, fs) 92 | y_h = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period) 93 | # librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs) 94 | sf.write('test/y_harvest_with_f0_refinement.wav', y_h, fs) 95 | 96 | # Comparison 97 | savefig('test/wavform.png', [x, _y, y]) 98 | savefig('test/sp.png', [_sp, sp]) 99 | savefig('test/ap.png', [_ap, ap], log=False) 100 | savefig('test/f0.png', [_f0, f0]) 101 | 102 | print('Please check "test" directory for output files') 103 | 104 | 105 | if __name__ == '__main__': 106 | args = parser.parse_args() 107 | main(args) 108 | -------------------------------------------------------------------------------- /demo/utterance/vaiueo2d.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/9e6cd3e6b2f813d1ffb910d1a0b18ab53f669086/demo/utterance/vaiueo2d.wav -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools", 4 | "numpy>=2; python_version>='3.9'", 5 | "oldest-supported-numpy; python_version<'3.9'", 6 | "cython", 7 | ] 8 | build-backend = "setuptools.build_meta" 9 | -------------------------------------------------------------------------------- /pyworld/__init__.py: -------------------------------------------------------------------------------- 1 | """PyWorld is a Python wrapper for WORLD vocoder. 2 | 3 | PyWorld wrappers WORLD, which is a free software for high-quality speech 4 | analysis, manipulation and synthesis. It can estimate fundamental frequency (F0), 5 | aperiodicity and spectral envelope and also generate the speech like input speech 6 | with only estimated parameters. 7 | 8 | For more information, see https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder 9 | """ 10 | 11 | from __future__ import division, print_function, absolute_import 12 | 13 | import pkg_resources 14 | 15 | __version__ = pkg_resources.get_distribution('pyworld').version 16 | 17 | from .pyworld import * 18 | -------------------------------------------------------------------------------- /pyworld/pyworld.pyx: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import cython 3 | 4 | import numpy as np 5 | cimport numpy as np 6 | np.import_array() 7 | 8 | 9 | cdef extern from "world/synthesis.h": 10 | void Synthesis(const double *f0, 11 | int f0_length, const double * const *spectrogram, 12 | const double * const *aperiodicity, 13 | int fft_size, double frame_period, 14 | int fs, int y_length, double *y) except + nogil 15 | 16 | 17 | cdef extern from "world/cheaptrick.h": 18 | ctypedef struct CheapTrickOption: 19 | double q1 20 | double f0_floor 21 | int fft_size 22 | 23 | int GetFFTSizeForCheapTrick(int fs, const CheapTrickOption *option) except + 24 | double GetF0FloorForCheapTrick(int fs, int fft_size) except + 25 | void InitializeCheapTrickOption(int fs, CheapTrickOption *option) except + 26 | void CheapTrick(const double *x, int x_length, int fs, const double *temporal_positions, 27 | const double *f0, int f0_length, const CheapTrickOption *option, 28 | double **spectrogram) except + nogil 29 | 30 | 31 | cdef extern from "world/dio.h": 32 | ctypedef struct DioOption: 33 | double f0_floor 34 | double f0_ceil 35 | double channels_in_octave 36 | double frame_period 37 | int speed 38 | double allowed_range 39 | 40 | void InitializeDioOption(DioOption *option) except + 41 | int GetSamplesForDIO(int fs, int x_length, double frame_period) 42 | void Dio(const double *x, int x_length, int fs, const DioOption *option, 43 | double *temporal_positions, double *f0) except + nogil 44 | 45 | 46 | cdef extern from "world/harvest.h": 47 | ctypedef struct HarvestOption: 48 | double f0_floor 49 | double f0_ceil 50 | double frame_period 51 | 52 | void InitializeHarvestOption(HarvestOption *option) 53 | int GetSamplesForHarvest(int fs, int x_length, double frame_period) 54 | void Harvest(const double *x, int x_length, int fs, const HarvestOption *option, 55 | double *temporal_positions, double *f0) except + nogil 56 | 57 | 58 | cdef extern from "world/d4c.h": 59 | ctypedef struct D4COption: 60 | double threshold 61 | 62 | void InitializeD4COption(D4COption *option) except + 63 | void D4C(const double *x, int x_length, int fs, const double *temporal_positions, 64 | const double *f0, int f0_length, int fft_size, const D4COption *option, 65 | double **aperiodicity) except + nogil 66 | 67 | 68 | cdef extern from "world/stonemask.h": 69 | void StoneMask(const double *x, int x_length, int fs, 70 | const double *temporal_positions, const double *f0, int f0_length, 71 | double *refined_f0) except + nogil 72 | 73 | 74 | cdef extern from "world/codec.h": 75 | int GetNumberOfAperiodicities(int fs) 76 | void CodeAperiodicity(const double * const *aperiodicity, int f0_length, 77 | int fs, int fft_size, double **coded_aperiodicity) except + 78 | void DecodeAperiodicity(const double * const *coded_aperiodicity, 79 | int f0_length, int fs, int fft_size, double **aperiodicity) except + 80 | void CodeSpectralEnvelope(const double * const *spectrogram, int f0_length, 81 | int fs, int fft_size, int number_of_dimensions, 82 | double **coded_spectral_envelope) except + 83 | void DecodeSpectralEnvelope(const double * const *coded_spectral_envelope, 84 | int f0_length, int fs, int fft_size, int number_of_dimensions, 85 | double **spectrogram) except + 86 | 87 | 88 | default_frame_period = 5.0 89 | default_f0_floor = 71.0 90 | default_f0_ceil = 800.0 91 | 92 | 93 | def dio(np.ndarray[double, ndim=1, mode="c"] x not None, int fs, 94 | f0_floor=default_f0_floor, f0_ceil=default_f0_ceil, 95 | channels_in_octave=2.0, frame_period=default_frame_period, 96 | speed=1, allowed_range=0.1): 97 | """DIO F0 extraction algorithm. 98 | 99 | Parameters 100 | ---------- 101 | x : ndarray 102 | Input waveform signal. 103 | fs : int 104 | Sample rate of input signal in Hz. 105 | f0_floor : float 106 | Lower F0 limit in Hz. 107 | Default: 71.0 108 | f0_ceil : float 109 | Upper F0 limit in Hz. 110 | Default: 800.0 111 | channels_in_octave : float 112 | Resolution of multiband processing; normally shouldn't be changed. 113 | Default: 2.0 114 | frame_period : float 115 | Period between consecutive frames in milliseconds. 116 | Default: 5.0 117 | speed : int 118 | The F0 estimator may downsample the input signal using this integer factor 119 | (range [1;12]). The algorithm will then operate on a signal at fs/speed Hz 120 | to reduce computational complexity, but high values may negatively impact 121 | accuracy. 122 | Default: 1 (no downsampling) 123 | allowed_range : float 124 | Threshold for voiced/unvoiced decision. Can be any value >= 0, but 0.02 to 0.2 125 | is a reasonable range. Lower values will cause more frames to be considered 126 | unvoiced (in the extreme case of `threshold=0`, almost all frames will be unvoiced). 127 | Default: 0.1 128 | 129 | Returns 130 | ------- 131 | f0 : ndarray 132 | Estimated F0 contour. 133 | temporal_positions : ndarray 134 | Temporal position of each frame. 135 | """ 136 | cdef int x_length = len(x) 137 | cdef DioOption option 138 | InitializeDioOption(&option) 139 | option.channels_in_octave = channels_in_octave 140 | option.f0_floor = f0_floor 141 | option.f0_ceil = f0_ceil 142 | option.frame_period = frame_period 143 | option.speed = speed 144 | option.allowed_range = allowed_range 145 | f0_length = GetSamplesForDIO(fs, x_length, option.frame_period) 146 | cdef np.ndarray[double, ndim=1, mode="c"] f0 = \ 147 | np.zeros(f0_length, dtype=np.dtype('float64')) 148 | cdef np.ndarray[double, ndim=1, mode="c"] temporal_positions = \ 149 | np.zeros(f0_length, dtype=np.dtype('float64')) 150 | with (nogil, cython.boundscheck(False)): 151 | Dio(&x[0], x_length, fs, &option, &temporal_positions[0], &f0[0]) 152 | return f0, temporal_positions 153 | 154 | 155 | def harvest(np.ndarray[double, ndim=1, mode="c"] x not None, int fs, 156 | f0_floor=default_f0_floor, f0_ceil=default_f0_ceil, 157 | frame_period=default_frame_period): 158 | """Harvest F0 extraction algorithm. 159 | 160 | Parameters 161 | ---------- 162 | x : ndarray 163 | Input waveform signal. 164 | fs : int 165 | Sample rate of input signal in Hz. 166 | f0_floor : float 167 | Lower F0 limit in Hz. 168 | Default: 71.0 169 | f0_ceil : float 170 | Upper F0 limit in Hz. 171 | Default: 800.0 172 | frame_period : float 173 | Period between consecutive frames in milliseconds. 174 | Default: 5.0 175 | 176 | Returns 177 | ------- 178 | f0 : ndarray 179 | Estimated F0 contour. 180 | temporal_positions : ndarray 181 | Temporal position of each frame. 182 | """ 183 | cdef int x_length = len(x) 184 | cdef HarvestOption option 185 | InitializeHarvestOption(&option) 186 | option.f0_floor = f0_floor 187 | option.f0_ceil = f0_ceil 188 | option.frame_period = frame_period 189 | f0_length = GetSamplesForHarvest(fs, x_length, option.frame_period) 190 | cdef np.ndarray[double, ndim=1, mode="c"] f0 = \ 191 | np.zeros(f0_length, dtype=np.dtype('float64')) 192 | cdef np.ndarray[double, ndim=1, mode="c"] temporal_positions = \ 193 | np.zeros(f0_length, dtype=np.dtype('float64')) 194 | with (nogil, cython.boundscheck(False)): 195 | Harvest(&x[0], x_length, fs, &option, &temporal_positions[0], &f0[0]) 196 | return f0, temporal_positions 197 | 198 | 199 | def stonemask(np.ndarray[double, ndim=1, mode="c"] x not None, 200 | np.ndarray[double, ndim=1, mode="c"] f0 not None, 201 | np.ndarray[double, ndim=1, mode="c"] temporal_positions not None, 202 | int fs): 203 | """StoneMask F0 refinement algorithm. 204 | 205 | Parameters 206 | ---------- 207 | x : ndarray 208 | Input waveform signal. 209 | f0 : ndarray 210 | Input F0 contour. 211 | temporal_positions : ndarray 212 | Temporal positions of each frame. 213 | fs : int 214 | Sample rate of input signal in Hz. 215 | 216 | Returns 217 | ------- 218 | refined_f0 : ndarray 219 | Refined F0 contour. 220 | """ 221 | cdef int x_length = len(x) 222 | cdef int f0_length = len(f0) 223 | cdef np.ndarray[double, ndim=1, mode="c"] refined_f0 = \ 224 | np.zeros(f0_length, dtype=np.dtype('float64')) 225 | with (nogil, cython.boundscheck(False)): 226 | StoneMask(&x[0], x_length, fs, &temporal_positions[0], 227 | &f0[0], f0_length, &refined_f0[0]) 228 | return refined_f0 229 | 230 | 231 | def get_cheaptrick_fft_size(int fs, f0_floor=default_f0_floor): 232 | """Calculate suitable FFT size for CheapTrick given F0 floor. 233 | 234 | Parameters 235 | ---------- 236 | fs : int 237 | Sample rate of input signal in Hz. 238 | f0_floor : float 239 | Lower F0 limit in Hz. The required FFT size is a direct 240 | consequence of the F0 floor used. 241 | Default: 71.0 242 | 243 | Returns 244 | ------- 245 | fft_size : int 246 | Resulting FFT size. 247 | """ 248 | cdef CheapTrickOption option 249 | option.f0_floor = f0_floor 250 | cdef int fft_size = GetFFTSizeForCheapTrick(fs, &option) 251 | return fft_size 252 | 253 | def get_cheaptrick_f0_floor(int fs, int fft_size): 254 | """Calculates actual lower F0 limit for CheapTrick 255 | based on the sampling frequency and FFT size used. Whenever F0 is below 256 | this threshold the spectrum will be analyzed as if the frame is unvoiced 257 | (using kDefaultF0 defined in constantnumbers.h). 258 | 259 | Parameters 260 | ---------- 261 | fs : int 262 | Sample rate of input signal in Hz. 263 | fft_size : int 264 | FFT size used for CheapTrick. 265 | 266 | Returns 267 | ------- 268 | f0_floor : float 269 | Resulting lower F0 limit in Hz. 270 | """ 271 | cdef double f0_floor = GetF0FloorForCheapTrick(fs, fft_size) 272 | return f0_floor 273 | 274 | def cheaptrick(np.ndarray[double, ndim=1, mode="c"] x not None, 275 | np.ndarray[double, ndim=1, mode="c"] f0 not None, 276 | np.ndarray[double, ndim=1, mode="c"] temporal_positions not None, 277 | int fs, 278 | q1=-0.15, f0_floor=default_f0_floor, fft_size=None): 279 | """CheapTrick harmonic spectral envelope estimation algorithm. 280 | 281 | Parameters 282 | ---------- 283 | x : ndarray 284 | Input waveform signal. 285 | f0 : ndarray 286 | Input F0 contour. 287 | temporal_positions : ndarray 288 | Temporal positions of each frame. 289 | fs : int 290 | Sample rate of input signal in Hz. 291 | q1 : float 292 | Spectral recovery parameter. 293 | Default: -0.15 (this value was tuned and normally does not need adjustment) 294 | f0_floor : float, None 295 | Lower F0 limit in Hz. Not used in case `fft_size` is specified. 296 | Default: 71.0 297 | fft_size : int, None 298 | FFT size to be used. When `None` (default) is used, the FFT size is computed 299 | automatically as a function of the given input sample rate and F0 floor. 300 | When `fft_size` is specified, the given `f0_floor` parameter is ignored. 301 | Default: None 302 | 303 | Returns 304 | ------- 305 | spectrogram : ndarray 306 | Spectral envelope (squared magnitude). 307 | """ 308 | cdef CheapTrickOption option 309 | InitializeCheapTrickOption(fs, &option) 310 | option.q1 = q1 311 | if fft_size is None: 312 | option.f0_floor = f0_floor # CheapTrickOption.f0_floor is only used in GetFFTSizeForCheapTrick() 313 | option.fft_size = GetFFTSizeForCheapTrick(fs, &option) 314 | else: 315 | option.fft_size = fft_size 316 | # the f0_floor used by CheapTrick() will be re-compute from this given fft_size 317 | cdef int x_length = len(x) 318 | cdef int f0_length = len(f0) 319 | 320 | cdef double[:, ::1] spectrogram = np.zeros((f0_length, option.fft_size//2 + 1), 321 | dtype=np.dtype('float64')) 322 | cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp) 323 | cdef double **cpp_spectrogram = ( &tmp[0]) 324 | cdef np.intp_t i 325 | with (nogil, cython.boundscheck(False)): 326 | for i in range(f0_length): 327 | cpp_spectrogram[i] = &spectrogram[i, 0] 328 | 329 | CheapTrick(&x[0], x_length, fs, &temporal_positions[0], 330 | &f0[0], f0_length, &option, cpp_spectrogram) 331 | return np.array(spectrogram, dtype=np.float64) 332 | 333 | 334 | def d4c(np.ndarray[double, ndim=1, mode="c"] x not None, 335 | np.ndarray[double, ndim=1, mode="c"] f0 not None, 336 | np.ndarray[double, ndim=1, mode="c"] temporal_positions not None, 337 | int fs, 338 | threshold=0.85, fft_size=None): 339 | """D4C aperiodicity estimation algorithm. 340 | 341 | Parameters 342 | ---------- 343 | x : ndarray 344 | Input waveform signal. 345 | f0 : ndarray 346 | Input F0 contour. 347 | temporal_positions : ndarray 348 | Temporal positions of each frame. 349 | fs : int 350 | Sample rate of input signal in Hz. 351 | q1 : float 352 | Spectral recovery parameter. 353 | Default: -0.15 (this value was tuned and normally does not need adjustment) 354 | threshold : float 355 | Threshold for aperiodicity-based voiced/unvoiced decision, in range 0 to 1. 356 | If a value of 0 is used, voiced frames will be kept voiced. If a value > 0 is 357 | used some voiced frames can be considered unvoiced by setting their aperiodicity 358 | to 1 (thus synthesizing them with white noise). Using `threshold=0` will result 359 | in the behavior of older versions of D4C. The current default of 0.85 is meant 360 | to be used in combination with the Harvest F0 estimator, which was designed to have 361 | a high voiced/unvoiced threshold (i.e. most frames will be considered voiced). 362 | Default: 0.85 363 | fft_size : int, None 364 | FFT size to be used. When `None` (default) is used, the FFT size is computed 365 | automatically as a function of the given input sample rate and the default F0 floor. 366 | When `fft_size` is specified, it should match the FFT size used to compute 367 | the spectral envelope (i.e. `fft_size=2*(sp.shape[1] - 1)`) in order to get the 368 | desired results when resynthesizing. 369 | Default: None 370 | 371 | Returns 372 | ------- 373 | aperiodicity : ndarray 374 | Aperiodicity (envelope, linear magnitude relative to spectral envelope). 375 | """ 376 | cdef int x_length = len(x) 377 | cdef int f0_length = len(f0) 378 | cdef int fft_size0 379 | if fft_size is None: 380 | fft_size0 = get_cheaptrick_fft_size(fs, default_f0_floor) 381 | else: 382 | fft_size0 = fft_size 383 | 384 | cdef D4COption option 385 | InitializeD4COption(&option) 386 | option.threshold = threshold 387 | 388 | cdef double[:, ::1] aperiodicity = np.zeros((f0_length, fft_size0//2 + 1), 389 | dtype=np.dtype('float64')) 390 | cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp) 391 | cdef double **cpp_aperiodicity = ( &tmp[0]) 392 | cdef np.intp_t i 393 | with (nogil, cython.boundscheck(False)): 394 | for i in range(f0_length): 395 | cpp_aperiodicity[i] = &aperiodicity[i, 0] 396 | 397 | D4C(&x[0], x_length, fs, &temporal_positions[0], 398 | &f0[0], f0_length, fft_size0, &option, 399 | cpp_aperiodicity) 400 | return np.array(aperiodicity, dtype=np.float64) 401 | 402 | 403 | def synthesize(np.ndarray[double, ndim=1, mode="c"] f0 not None, 404 | np.ndarray[double, ndim=2, mode="c"] spectrogram not None, 405 | np.ndarray[double, ndim=2, mode="c"] aperiodicity not None, 406 | int fs, 407 | double frame_period=default_frame_period): 408 | """WORLD synthesis from parametric representation. 409 | 410 | Parameters 411 | ---------- 412 | f0 : ndarray 413 | Input F0 contour. 414 | spectrogram : ndarray 415 | Spectral envelope. 416 | aperiodicity : ndarray 417 | Aperodicity envelope. 418 | fs : int 419 | Sample rate of input signal in Hz. 420 | frame_period : float 421 | Period between consecutive frames in milliseconds. 422 | Default: 5.0 423 | 424 | Returns 425 | ------- 426 | y : ndarray 427 | Output waveform signal. 428 | """ 429 | if (f0.shape[0] != spectrogram.shape[0] or 430 | f0.shape[0] != aperiodicity.shape[0]): 431 | raise ValueError('Mismatched number of frames between F0 ({:d}), ' 432 | 'spectrogram ({:d}) and aperiodicty ({:d})' 433 | .format(f0.shape[0], spectrogram.shape[0], 434 | aperiodicity.shape[0])) 435 | if spectrogram.shape[1] != aperiodicity.shape[1]: 436 | raise ValueError('Mismatched dimensionality (spec size) between ' 437 | 'spectrogram ({:d}) and aperiodicity ({:d})' 438 | .format(spectrogram.shape[1], aperiodicity.shape[1])) 439 | 440 | cdef int f0_length = len(f0) 441 | cdef int y_length = (f0_length * frame_period * fs / 1000) 442 | cdef int fft_size = (spectrogram.shape[1] - 1)*2 443 | cdef np.ndarray[double, ndim=1, mode="c"] y = \ 444 | np.zeros(y_length, dtype=np.dtype('float64')) 445 | 446 | cdef double[:, ::1] spectrogram0 = spectrogram 447 | cdef double[:, ::1] aperiodicity0 = aperiodicity 448 | cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp) 449 | cdef np.intp_t[:] tmp2 = np.zeros(f0_length, dtype=np.intp) 450 | cdef double **cpp_spectrogram = ( &tmp[0]) 451 | cdef double **cpp_aperiodicity = ( &tmp2[0]) 452 | cdef np.intp_t i 453 | with (nogil, cython.boundscheck(False)): 454 | for i in range(f0_length): 455 | cpp_spectrogram[i] = &spectrogram0[i, 0] 456 | cpp_aperiodicity[i] = &aperiodicity0[i, 0] 457 | 458 | Synthesis(&f0[0], f0_length, cpp_spectrogram, 459 | cpp_aperiodicity, fft_size, frame_period, fs, y_length, &y[0]) 460 | return y 461 | 462 | 463 | def get_num_aperiodicities(fs): 464 | """Calculate the required dimensionality to code D4C aperiodicity. 465 | 466 | Parameters 467 | ---------- 468 | fs : int 469 | Sample rate of input signal in Hz. 470 | 471 | Returns 472 | ------- 473 | n_aper : int 474 | Required number of coefficients. 475 | """ 476 | cdef int n_aper = GetNumberOfAperiodicities(fs) 477 | return n_aper 478 | 479 | def code_aperiodicity(np.ndarray[double, ndim=2, mode="c"] aperiodicity, fs): 480 | """Reduce dimensionality of D4C aperiodicity. 481 | 482 | Parameters 483 | ---------- 484 | aperiodicity : ndarray 485 | Aperodicity envelope. 486 | fs : int 487 | Sample rate of input signal in Hz. 488 | 489 | Returns 490 | ------- 491 | coded_aperiodicity : ndarray 492 | Coded aperiodicity envelope. 493 | """ 494 | cdef int ap_length = len(aperiodicity) 495 | cdef int fft_size = (aperiodicity.shape[1] - 1)*2 496 | cdef int n_coded_aper = get_num_aperiodicities(fs) 497 | 498 | cdef double[:, ::1] aper = aperiodicity 499 | cdef double[:, ::1] coded_aper = np.zeros((ap_length, n_coded_aper), 500 | dtype=np.dtype('float64')) 501 | cdef np.intp_t[:] tmp1 = np.zeros(ap_length, dtype=np.intp) 502 | cdef np.intp_t[:] tmp2 = np.zeros(ap_length, dtype=np.intp) 503 | cdef double **cpp_aper = ( &tmp1[0]) 504 | cdef double **cpp_coded_aper = ( &tmp2[0]) 505 | cdef np.intp_t i 506 | for i in range(ap_length): 507 | cpp_aper[i] = &aper[i, 0] 508 | cpp_coded_aper[i] = &coded_aper[i, 0] 509 | 510 | CodeAperiodicity(cpp_aper, ap_length, fs, 511 | fft_size, cpp_coded_aper) 512 | 513 | return np.array(coded_aper, dtype=np.float64) 514 | 515 | def decode_aperiodicity(np.ndarray[double, ndim=2, mode="c"] coded_aperiodicity, 516 | fs, fft_size): 517 | """Restore full dimensionality of coded D4C aperiodicity. 518 | 519 | Parameters 520 | ---------- 521 | coded_aperiodicity : ndarray 522 | Coded aperodicity envelope. 523 | fs : int 524 | Sample rate of input signal in Hz. 525 | fft_size : int 526 | FFT size corresponding to the full dimensional aperiodicity. 527 | 528 | Returns 529 | ------- 530 | aperiodicity : ndarray 531 | Aperiodicity envelope. 532 | """ 533 | cdef int ap_length = len(coded_aperiodicity) 534 | cdef int n_coded_aper = get_num_aperiodicities(fs) 535 | if n_coded_aper != coded_aperiodicity.shape[1]: 536 | raise ValueError('Invalid aperiodicity code dimensionality ' 537 | '(was: {:d}, expected: {:d})' 538 | .format(coded_aperiodicity.shape[1], n_coded_aper)) 539 | 540 | cdef double[:, ::1] coded_aper = coded_aperiodicity 541 | cdef double[:, ::1] aper = np.zeros((ap_length, fft_size//2 + 1), 542 | dtype=np.dtype('float64')) 543 | cdef np.intp_t[:] tmp1 = np.zeros(ap_length, dtype=np.intp) 544 | cdef np.intp_t[:] tmp2 = np.zeros(ap_length, dtype=np.intp) 545 | cdef double **cpp_coded_aper = ( &tmp1[0]) 546 | cdef double **cpp_aper = ( &tmp2[0]) 547 | cdef np.intp_t i 548 | for i in range(ap_length): 549 | cpp_coded_aper[i] = &coded_aper[i, 0] 550 | cpp_aper[i] = &aper[i, 0] 551 | 552 | DecodeAperiodicity(cpp_coded_aper, ap_length, fs, fft_size, cpp_aper) 553 | 554 | return np.array(aper, dtype=np.float64) 555 | 556 | def code_spectral_envelope(np.ndarray[double, ndim=2, mode="c"] spectrogram, fs, 557 | number_of_dimensions): 558 | """Reduce dimensionality of spectral envelope. 559 | 560 | Parameters 561 | ---------- 562 | spectrogram : ndarray 563 | Spectral envelope. 564 | fs : int 565 | Sample rate of input signal in Hz. 566 | number_of_dimensions : int 567 | Number of dimentions of coded spectral envelope 568 | 569 | Returns 570 | ------- 571 | coded_spectral_envelope : ndarray 572 | Coded spectral envelope. 573 | """ 574 | cdef int sp_length = len(spectrogram) 575 | cdef int fft_size = (spectrogram.shape[1] - 1)*2 576 | 577 | cdef double[:, ::1] sp = spectrogram 578 | cdef double[:, ::1] coded_sp = np.zeros((sp_length, number_of_dimensions), 579 | dtype=np.dtype('float64')) 580 | cdef np.intp_t[:] tmp1 = np.zeros(sp_length, dtype=np.intp) 581 | cdef np.intp_t[:] tmp2 = np.zeros(sp_length, dtype=np.intp) 582 | cdef double **cpp_sp = ( &tmp1[0]) 583 | cdef double **cpp_coded_sp = ( &tmp2[0]) 584 | cdef np.intp_t i 585 | for i in range(sp_length): 586 | cpp_sp[i] = &sp[i, 0] 587 | cpp_coded_sp[i] = &coded_sp[i, 0] 588 | 589 | CodeSpectralEnvelope(cpp_sp, sp_length, fs, fft_size, 590 | number_of_dimensions, cpp_coded_sp) 591 | 592 | return np.array(coded_sp, dtype=np.float64) 593 | 594 | def decode_spectral_envelope(np.ndarray[double, ndim=2, mode="c"] coded_spectral_envelope, 595 | fs, fft_size): 596 | """Restore full dimensionality of coded spectral envelope. 597 | 598 | Parameters 599 | ---------- 600 | coded_spectral_envelope : ndarray 601 | Coded spectral envelope. 602 | fs : int 603 | Sample rate of input signal in Hz. 604 | fft_size : int 605 | FFT size corresponding to the full dimensional spectral envelope. 606 | 607 | Returns 608 | ------- 609 | spectrogram : ndarray 610 | Spectral envelope. 611 | """ 612 | cdef int sp_length = len(coded_spectral_envelope) 613 | cdef int number_of_dimensions = len(coded_spectral_envelope[0]) 614 | cdef double[:, ::1] coded_sp = coded_spectral_envelope 615 | cdef double[:, ::1] sp = np.zeros((sp_length, fft_size//2 + 1), 616 | dtype=np.dtype('float64')) 617 | cdef np.intp_t[:] tmp1 = np.zeros(sp_length, dtype=np.intp) 618 | cdef np.intp_t[:] tmp2 = np.zeros(sp_length, dtype=np.intp) 619 | cdef double **cpp_coded_sp = ( &tmp1[0]) 620 | cdef double **cpp_sp = ( &tmp2[0]) 621 | cdef np.intp_t i 622 | for i in range(sp_length): 623 | cpp_coded_sp[i] = &coded_sp[i, 0] 624 | cpp_sp[i] = &sp[i, 0] 625 | 626 | DecodeSpectralEnvelope(cpp_coded_sp, sp_length, fs, fft_size, 627 | number_of_dimensions, cpp_sp) 628 | 629 | return np.array(sp, dtype=np.float64) 630 | 631 | def wav2world(x, fs, fft_size=None, frame_period=default_frame_period): 632 | """Convenience function to do all WORLD analysis steps in a single call. 633 | 634 | In this case only `frame_period` can be configured and other parameters 635 | are fixed to their defaults. Likewise, F0 estimation is fixed to 636 | DIO plus StoneMask refinement. 637 | 638 | Parameters 639 | ---------- 640 | x : ndarray 641 | Input waveform signal. 642 | fs : int 643 | Sample rate of input signal in Hz. 644 | fft_size : int 645 | Length of Fast Fourier Transform (in number of samples) 646 | The resulting dimension of `ap` adn `sp` will be `fft_size` // 2 + 1 647 | frame_period : float 648 | Period between consecutive frames in milliseconds. 649 | Default: 5.0 650 | 651 | Returns 652 | ------- 653 | f0 : ndarray 654 | F0 contour. 655 | sp : ndarray 656 | Spectral envelope. 657 | ap : ndarray 658 | Aperiodicity. 659 | """ 660 | _f0, t = dio(x, fs, frame_period=frame_period) 661 | f0 = stonemask(x, _f0, t, fs) 662 | sp = cheaptrick(x, f0, t, fs, fft_size=fft_size) 663 | ap = d4c(x, f0, t, fs, fft_size=fft_size) 664 | return f0, sp, ap 665 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | matplotlib 3 | argparse; python_version<"3.5" 4 | soundfile 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, with_statement 2 | 3 | import sys 4 | from glob import glob 5 | from os.path import join 6 | 7 | import numpy 8 | from setuptools import Extension, find_packages, setup 9 | from setuptools.command.build_ext import build_ext 10 | 11 | 12 | _VERSION = '0.3.4' 13 | 14 | 15 | world_src_top = join("lib", "World", "src") 16 | world_sources = glob(join(world_src_top, "*.cpp")) 17 | 18 | ext_modules = [ 19 | Extension( 20 | name="pyworld.pyworld", 21 | include_dirs=[world_src_top, numpy.get_include()], 22 | sources=[join("pyworld", "pyworld.pyx")] + world_sources, 23 | language="c++")] 24 | 25 | kwargs = {"encoding": "utf-8"} if int(sys.version[0]) > 2 else {} 26 | setup( 27 | name="pyworld", 28 | description="PyWorld: a Python wrapper for WORLD vocoder", 29 | long_description=open("README.md", "r", **kwargs).read(), 30 | long_description_content_type="text/markdown", 31 | ext_modules=ext_modules, 32 | cmdclass={'build_ext': build_ext}, 33 | version=_VERSION, 34 | packages=find_packages(), 35 | install_requires=['numpy'], 36 | extras_require={ 37 | 'test': ['nose'], 38 | 'sdist': ['numpy', 'cython>=0.24'], 39 | }, 40 | author="Pyworld Contributors", 41 | author_email="jeremycchsu@gmail.com", 42 | url="https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder", 43 | keywords=['vocoder'], 44 | classifiers=[], 45 | ) 46 | --------------------------------------------------------------------------------