├── .gitignore
├── .gitmodules
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── appveyor.yml
├── demo
├── demo.py
└── utterance
│ └── vaiueo2d.wav
├── pyproject.toml
├── pyworld
├── __init__.py
└── pyworld.pyx
├── requirements.txt
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | test/
2 | *.cpp
3 | *.pyd
4 | .pypirc
5 | .vscode
6 |
7 | # Created by https://www.gitignore.io
8 |
9 | ### Python ###
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 |
14 | # C extensions
15 | *.so
16 |
17 | # Distribution / packaging
18 | .Python
19 | env/
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 |
59 | # Sphinx documentation
60 | docs/_build/
61 |
62 | # PyBuilder
63 | target/
64 |
65 |
66 | ### IPythonNotebook ###
67 | # Temporary data
68 | .ipynb_checkpoints/
69 |
70 |
71 | ### SublimeText ###
72 | # cache files for sublime text
73 | *.tmlanguage.cache
74 | *.tmPreferences.cache
75 | *.stTheme.cache
76 |
77 | # workspace files are user-specific
78 | *.sublime-workspace
79 |
80 | # project files should be checked into the repository, unless a significant
81 | # proportion of contributors will probably not be using SublimeText
82 | # *.sublime-project
83 |
84 | # sftp configuration file
85 | sftp-config.json
86 |
87 |
88 | ### Emacs ###
89 | # -*- mode: gitignore; -*-
90 | *~
91 | \#*\#
92 | /.emacs.desktop
93 | /.emacs.desktop.lock
94 | *.elc
95 | auto-save-list
96 | tramp
97 | .\#*
98 |
99 | # Org-mode
100 | .org-id-locations
101 | *_archive
102 |
103 | # flymake-mode
104 | *_flymake.*
105 |
106 | # eshell files
107 | /eshell/history
108 | /eshell/lastdir
109 |
110 | # elpa packages
111 | /elpa/
112 |
113 | # reftex files
114 | *.rel
115 |
116 | # AUCTeX auto folder
117 | /auto/
118 |
119 | # cask packages
120 | .cask/
121 |
122 |
123 | ### Vim ###
124 | [._]*.s[a-w][a-z]
125 | [._]s[a-w][a-z]
126 | *.un~
127 | Session.vim
128 | .netrwhist
129 | *~
130 |
131 |
132 | ### C++ ###
133 | # Compiled Object files
134 | *.slo
135 | *.lo
136 | *.o
137 | *.obj
138 |
139 | # Precompiled Headers
140 | *.gch
141 | *.pch
142 |
143 | # Compiled Dynamic libraries
144 | *.so
145 | *.dylib
146 | *.dll
147 |
148 | # Fortran module files
149 | *.mod
150 |
151 | # Compiled Static libraries
152 | *.lai
153 | *.la
154 | *.a
155 | *.lib
156 |
157 | # Executables
158 | *.exe
159 | *.out
160 | *.app
161 |
162 |
163 | ### OSX ###
164 | .DS_Store
165 | .AppleDouble
166 | .LSOverride
167 |
168 | # Icon must end with two \r
169 | Icon
170 |
171 |
172 | # Thumbnails
173 | ._*
174 |
175 | # Files that might appear on external disk
176 | .Spotlight-V100
177 | .Trashes
178 |
179 | # Directories potentially created on remote AFP share
180 | .AppleDB
181 | .AppleDesktop
182 | Network Trash Folder
183 | Temporary Items
184 | .apdisk
185 |
186 |
187 | ### Linux ###
188 | *~
189 |
190 | # KDE directory preferences
191 | .directory
192 |
193 | # Linux trash folder which might appear on any partition or disk
194 | .Trash-*
195 | test/
196 | *.cpp
197 | *.pyd
198 | .pypirc
199 | .vscode
200 |
201 | # Created by https://www.gitignore.io
202 |
203 | ### Python ###
204 | # Byte-compiled / optimized / DLL files
205 | __pycache__/
206 | *.py[cod]
207 |
208 | # C extensions
209 | *.so
210 |
211 | # Distribution / packaging
212 | .Python
213 | env/
214 | build/
215 | develop-eggs/
216 | dist/
217 | downloads/
218 | eggs/
219 | lib/
220 | lib64/
221 | parts/
222 | sdist/
223 | var/
224 | *.egg-info/
225 | .installed.cfg
226 | *.egg
227 |
228 | # PyInstaller
229 | # Usually these files are written by a python script from a template
230 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
231 | *.manifest
232 | *.spec
233 |
234 | # Installer logs
235 | pip-log.txt
236 | pip-delete-this-directory.txt
237 |
238 | # Unit test / coverage reports
239 | htmlcov/
240 | .tox/
241 | .coverage
242 | .cache
243 | nosetests.xml
244 | coverage.xml
245 |
246 | # Translations
247 | *.mo
248 | *.pot
249 |
250 | # Django stuff:
251 | *.log
252 |
253 | # Sphinx documentation
254 | docs/_build/
255 |
256 | # PyBuilder
257 | target/
258 |
259 |
260 | ### IPythonNotebook ###
261 | # Temporary data
262 | .ipynb_checkpoints/
263 |
264 |
265 | ### SublimeText ###
266 | # cache files for sublime text
267 | *.tmlanguage.cache
268 | *.tmPreferences.cache
269 | *.stTheme.cache
270 |
271 | # workspace files are user-specific
272 | *.sublime-workspace
273 |
274 | # project files should be checked into the repository, unless a significant
275 | # proportion of contributors will probably not be using SublimeText
276 | # *.sublime-project
277 |
278 | # sftp configuration file
279 | sftp-config.json
280 |
281 |
282 | ### Emacs ###
283 | # -*- mode: gitignore; -*-
284 | *~
285 | \#*\#
286 | /.emacs.desktop
287 | /.emacs.desktop.lock
288 | *.elc
289 | auto-save-list
290 | tramp
291 | .\#*
292 |
293 | # Org-mode
294 | .org-id-locations
295 | *_archive
296 |
297 | # flymake-mode
298 | *_flymake.*
299 |
300 | # eshell files
301 | /eshell/history
302 | /eshell/lastdir
303 |
304 | # elpa packages
305 | /elpa/
306 |
307 | # reftex files
308 | *.rel
309 |
310 | # AUCTeX auto folder
311 | /auto/
312 |
313 | # cask packages
314 | .cask/
315 |
316 |
317 | ### Vim ###
318 | [._]*.s[a-w][a-z]
319 | [._]s[a-w][a-z]
320 | *.un~
321 | Session.vim
322 | .netrwhist
323 | *~
324 |
325 |
326 | ### C++ ###
327 | # Compiled Object files
328 | *.slo
329 | *.lo
330 | *.o
331 | *.obj
332 |
333 | # Precompiled Headers
334 | *.gch
335 | *.pch
336 |
337 | # Compiled Dynamic libraries
338 | *.so
339 | *.dylib
340 | *.dll
341 |
342 | # Fortran module files
343 | *.mod
344 |
345 | # Compiled Static libraries
346 | *.lai
347 | *.la
348 | *.a
349 | *.lib
350 |
351 | # Executables
352 | *.exe
353 | *.out
354 | *.app
355 |
356 |
357 | ### OSX ###
358 | .DS_Store
359 | .AppleDouble
360 | .LSOverride
361 |
362 | # Icon must end with two \r
363 | Icon
364 |
365 |
366 | # Thumbnails
367 | ._*
368 |
369 | # Files that might appear on external disk
370 | .Spotlight-V100
371 | .Trashes
372 |
373 | # Directories potentially created on remote AFP share
374 | .AppleDB
375 | .AppleDesktop
376 | Network Trash Folder
377 | Temporary Items
378 | .apdisk
379 |
380 |
381 | ### Linux ###
382 | *~
383 |
384 | # KDE directory preferences
385 | .directory
386 |
387 | # Linux trash folder which might appear on any partition or disk
388 | .Trash-*
389 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib/World"]
2 | path = lib/World
3 | url = https://github.com/mmorise/World
4 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language:
2 | - python
3 |
4 | python:
5 | - "2.7"
6 | - "3.5"
7 | - "3.6"
8 | - "3.7"
9 | - "3.8"
10 | - "3.9"
11 | # - "3.10" # this doesn't work
12 |
13 | env:
14 | global:
15 | TWINE_USERNAME: JeremyCCHsu
16 | TWINE_PASSWORD:
17 | secure: K22xeLmYvib1wUQyRJOQ+wjvFVK88HNCsGWVXdCFaIfR9F5QAKMmh+RsyKCS5JdMdZF+RtDDgcfsMOIlscGm/crgwzAN9OijKmj4WZGDPR4BY8O0TZGT2n1tfRJjGdmEwMfftmIfHRjX91iZjCW5k7EAwYvV0d+hAdUrSI1FGOKYPpMVusMYNih3AtPbobpTCwgaKddwCVw1YoCEEgSuXfpyZfQzEirEGViJfXr6EmojVoVYi1oYlw4OhAKg+OVfTxVEVsT37xbPp0sjcADYVWr1kXAWF86fGyon+eSHMc5AewoP8tMNSeqyZuQ23EHx2xuK8Ipjv3C0xy+RmMPBdsi5xhbclElNvBH3OjL623x5JYziU+rsckh/6687vFzLg5twlfRDfwFwKHA4UVdSP0Uei8ebltHJwufyKiKjVMllJpbc9NOKJjSF+n0/ddwaebYbWgeZo6wvOnvuZb1d3TyGUktNfdypINNkd6D/RFd6l3kN4IhvBJqP7bZsSsrFoIdDL6to005Hy1Jb8IuUFihFC5BbWcFqd3BefOvZWmhpkZJRkTEN6pCEf0oNFxtWL6NuxwkbVZVcBgKAHAAz/4rKh6r0IipqfWfxKU3N9Phz998LfNwK2ZkQ32gDYdJ5abTY7v50J6s99U4DzSOaD4c3NoYE/VxyW/G1yo2FclQ=
18 |
19 | # works on 3.10 but requires re-installing numpy
20 | matrix:
21 | include:
22 | - python: 3.10
23 | dist: jammy
24 | sudo: true
25 | - python: 3.11
26 | dist: jammy
27 | sudo: true
28 |
29 | before_install:
30 | - sudo apt-get install -y libsndfile1
31 | - sudo apt-get install -y libssl3
32 |
33 | install:
34 | - pip install -U pip
35 | - pip install -r requirements.txt
36 | - pip install -e .
37 |
38 | script:
39 | - cd demo
40 | - python demo.py
41 | - cd ..
42 |
43 |
44 | # before_deploy:
45 | # - python setup.py bdist_wheel
46 |
47 | deploy:
48 | provider: pypi
49 | user: __token__
50 | password:
51 | secure: WF5VmRL0iTK/QPwlMzURfzNdjdvYbPf9QeiEImyaoofS0nsLOARQM+lPuKnXQdv206DOstrokzsYKfLJtfXflTB6AvVSD4OUQbzzpc+rKtHcg6GN+X27zvaf/8NnxOeaEKEcTvP5hEaGxP3Q7h7q3Q51Cwbl/aa0MmHHvik9l2KEVGvDvviieQzQWGxi2YH9xMl8kwpB62Byy13Fk3Jg0jr4DAO7PzIUjlT59GZ1K3elAFqREcJ64+RZQTMHi3Kikiq1QOU1etAq36iGag1RfW5sd35LS4b0CkCaGVfhrv7RG/oQtHPR+3bAedGKkJAUj+xo0ra+9+L0D8OJ8/MwlHXapd24+WGGMdAcUuKvaahj1D+Bn09XNcw0wLu6fCllPbfTMNFWHbB5SbnFhICW2mXNhl1KJ+IQmGW3mW8VHW+1bF6+XCCMzRVezetG6XLVY9ptDxM3mE3EWpaU102g9NXaWkBb/5FuNj+QO1P86WonpQTQLGMnAx5veVMiUhDEeXi6aMelL8hZnimdYaa/5vOJqo6k1OjzPNEhlrazYaU18OI+/Gtg7iDTdu99yHDMiAeq4kRN5lBfiX0PZrttZoJW0P0u44OO7MrQJ54Lz64KevdxPynh9WGJVWLkNaql3d91jvs0xaQwu1X3gbnhE19aEtMICwEfS53TYxl/fWs=
52 | on:
53 | tags: true
54 | skip_cleanup: true
55 | skip_existing: true
56 | # distributions: "sdist bdist_wheel"
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright 2016 pyworld contributors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md LICENSE
2 | recursive-include lib *.cpp *.h LICENSE.txt
3 | recursive-include pyworld *.cpp *.pxd *.pyx
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PyWORLD - A Python wrapper of WORLD Vocoder
2 |
3 |
4 | | **`Linux`** | **`Windows`** |
5 | |-----------------|-----------|
6 | | [](https://app.travis-ci.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder) | [](https://ci.appveyor.com/project/JeremyCCHsu/python-wrapper-for-world-vocoder) |
7 |
8 |
9 |
10 |
11 | WORLD Vocoder is a fast and high-quality vocoder
12 | which parameterizes speech into three components:
13 |
14 | 1. `f0`: Pitch contour
15 | 2. `sp`: Harmonic spectral envelope
16 | 3. `ap`: Aperiodic spectral envelope (relative to the harmonic spectral envelope)
17 |
18 | It can also (re)synthesize speech using these features (see examples below).
19 |
20 | For more information, please visit Dr. Morise's [WORLD repository](https://github.com/mmorise/World)
21 | and the [official website of WORLD Vocoder](http://ml.cs.yamanashi.ac.jp/world/english)
22 |
23 |
24 | ## APIs
25 |
26 | ### Vocoder Functions
27 | ```python
28 | import pyworld as pw
29 | _f0, t = pw.dio(x, fs) # raw pitch extractor
30 | f0 = pw.stonemask(x, _f0, t, fs) # pitch refinement
31 | sp = pw.cheaptrick(x, f0, t, fs) # extract smoothed spectrogram
32 | ap = pw.d4c(x, f0, t, fs) # extract aperiodicity
33 |
34 | y = pw.synthesize(f0, sp, ap, fs) # synthesize an utterance using the parameters
35 | ```
36 |
37 |
38 | ### Utility
39 | ```python
40 | # Convert speech into features (using default arguments)
41 | f0, sp, ap = pw.wav2world(x, fs)
42 | ```
43 |
44 |
45 | You can change the default arguments of the function, too.
46 | See more info using `help`.
47 |
48 |
49 | ## Installation
50 |
51 | ### Using Pip
52 | `pip install pyworld`
53 |
54 |
55 | ### Building from Source
56 | ```bash
57 | git clone https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder.git
58 | cd Python-Wrapper-for-World-Vocoder
59 | git submodule update --init
60 | pip install -U pip
61 | pip install -r requirements.txt
62 | pip install .
63 | ```
64 | It will automatically `git clone` Morise's World Vocoder (C++ version).
65 | (It seems to me that using `virtualenv` or `conda` is the best practice.)
66 |
67 |
68 | ### Installation Validation
69 | You can validate installation by running
70 | ```bash
71 | cd demo
72 | python demo.py
73 | ```
74 | to see if you get results in `test/` direcotry.
75 | (Please avoid writing and executing codes in the `Python-Wrapper-for-World-Vocoder` folder for now.)
76 |
77 | ### Environment/Dependencies
78 | - Operating systems
79 | - Linux Ubuntu 14.04+
80 | - Windows (thanks to [wuaalb](https://github.com/wuaalb))
81 | - WSL
82 | - Python
83 | - 3.7+
84 |
85 | You can install dependencies these by `pip install -r requirements.txt`
86 |
87 |
88 |
89 | ## Notice
90 | - WORLD vocoder is designed for speech sampled ≥ 16 kHz.
91 | Applying WORLD to 8 kHz speech will fail.
92 | See a possible workaround [here](https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/issues/54).
93 | - When the SNR is low, extracting pitch using `harvest` instead of `dio`
94 | is a better option.
95 |
96 |
97 | ## Troubleshooting
98 | 1. Upgrade your Cython version to 0.24.
99 | (I failed to build it on Cython 0.20.1post0)
100 | It'll require you to download Cython form http://cython.org/
101 | Unzip it, and `python setup.py install` it.
102 | (I tried `pip install Cython` but the upgrade didn't seem correct)
103 | (Again, add `--user` if you don't have root access.)
104 | 2. Upon executing `demo/demo.py`, the following code might be needed in some environments (e.g. when you're working on a remote Linux server):
105 |
106 | ```python
107 | import matplotlib
108 | matplotlib.use('Agg')
109 | ```
110 | 3. If you encounter `library not found: sndfile` error upon executing `demo.py`,
111 | you might have to install it by `apt-get install libsoundfile1`.
112 | You can also replace `pysoundfile` with `scipy` or `librosa`, but some modification is needed:
113 | - librosa:
114 | - load(fiilename, dtype=np.float64)
115 | - output.write_wav(filename, wav, fs)
116 | - remember to pass `dtype` argument to ensure that the method gives you a `double`.
117 | - scipy:
118 | - You'll have to write a customized utility function based on the following methods
119 | - scipy.io.wavfile.read (but this gives you `short`)
120 | - scipy.io.wavfile.write
121 |
122 | 4. If you have installation issue on Windows, I probably could not provide
123 | much help because my development environment is Ubuntu
124 | and Windows Subsystem for Linux ([read this if you are interested in installing it](https://github.com/JeremyCCHsu/wsl)).
125 |
126 |
127 | ### Other Installation Suggestions
128 | 1. Use `pip install .` is safer and you can easily uninstall pyworld by `pip uninstall pyworld`
129 | - For Mac users: You might need to do `MACOSX_DEPLOYMENT_TARGET=10.9 pip install .` See [issue](https://github.com/SeanNaren/warp-ctc/issues/129#issuecomment-502349652).
130 | 2. Another way to install pyworld is via
131 | `python setup.py install`
132 | - Add `--user` if you don't have root access
133 | - Add `--record install.txt` to track the installation dir
134 | 3. If you just want to try out some experiments, execute
135 | `python setup.py build_ext --inplace`
136 | Then you can use PyWorld from this directory.
137 | You can also copy the resulting **pyworld.so** (pyworld.{arch}.pyd on Windows) file to
138 | `~/.local/lib/python2.7/site-packages` (or corresponding Windows directory)
139 | so that you can use it everywhere like an installed package.
140 | Alternatively you can copy/symlink the compiled files using pip, e.g. `pip install -e .`
141 |
142 |
143 |
144 | ## Acknowledgement
145 | Thank all contributors ([tats-u](https://github.com/tats-u), [wuaalb](https://github.com/wuaalb), [r9y9](https://github.com/r9y9), [rikrd](https://github.com/rikrd), [kudan2510](https://github.com/kundan2510)) for making this repo better and [sotelo](https://github.com/sotelo) whose [world.py](https://github.com/sotelo/world.py) inspired this repo.
146 |
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | environment:
2 | TWINE_USERNAME: __token__
3 | TWINE_PASSWORD:
4 | secure: A+ks10m1F2uReMyFye6x7fEWxuKCAFTn3Qu+Lw8T/4g3ExgSz9N8LxIEvXC+RkQwjTarXJyCOM8er4OsfYbQzfIfdMxgsuEuSY3TiPj2AssIXuNp7Xps56cRT+AQqXMp4UJ913Ryq1yKvnJHq91c6Lptx7WtWvbixrUI6p+ocV8zb8rO37KHUXR864z9h87dpwiY7ovKhV5+RF6AR+rmdt7nsaPLAd3M65r7M9v/sebaMNjhau5sizD3cwxOlzh/xVWhXABcMd+VlIhtq3xa4g==
5 |
6 | matrix:
7 | - PYTHON_VERSION: "3.6"
8 | PYTHON_ARCH: "64"
9 | MINICONDA: C:\Miniconda3-x64
10 |
11 | - PYTHON_VERSION: "3.6"
12 | PYTHON_ARCH: "32"
13 | MINICONDA: C:\Miniconda3
14 |
15 | - PYTHON_VERSION: "3.7"
16 | PYTHON_ARCH: "64"
17 | MINICONDA: C:\Miniconda3-x64
18 |
19 | - PYTHON_VERSION: "3.7"
20 | PYTHON_ARCH: "32"
21 | MINICONDA: C:\Miniconda3
22 |
23 | - PYTHON_VERSION: "3.8"
24 | PYTHON_ARCH: "64"
25 | MINICONDA: C:\Miniconda3-x64
26 |
27 | # - PYTHON_VERSION: "3.8"
28 | # PYTHON_ARCH: "32"
29 | # MINICONDA: C:\Miniconda3
30 |
31 | - PYTHON_VERSION: "3.9"
32 | PYTHON_ARCH: "64"
33 | MINICONDA: C:\Miniconda3-x64
34 |
35 | # - PYTHON_VERSION: "3.9"
36 | # PYTHON_ARCH: "32"
37 | # MINICONDA: C:\Miniconda3
38 |
39 | - PYTHON_VERSION: "3.10"
40 | PYTHON_ARCH: "64"
41 | MINICONDA: C:\Miniconda3-x64
42 |
43 | # - PYTHON_VERSION: "3.10"
44 | # PYTHON_ARCH: "32"
45 | # MINICONDA: C:\Miniconda3
46 |
47 | - PYTHON_VERSION: "3.11"
48 | PYTHON_ARCH: "64"
49 | MINICONDA: C:\Miniconda3-x64
50 |
51 | # The above commented lines has problem executing the `test_script`
52 | # Python >= 3.8 (32-bit)
53 |
54 | # platform: x64
55 |
56 | init:
57 | - ps: |
58 | Write-Output "*** Python $env:PYTHON_VERSION ***"
59 | $env:Path += ";$env:PYTHON_ROOT;$env:PYTHON_ROOT\Scripts"
60 |
61 | install:
62 | - ps: |
63 | "INFO: Variables"
64 | "INFO: is a tag: $Env:APPVEYOR_REPO_TAG"
65 | "INFO: $APPVEYOR_REPO_TAG"
66 | "INFO: tag name: $Env:APPVEYOR_REPO_TAG_NAME"
67 | "INFO: $APPVEYOR_REPO_TAG_NAME"
68 | - "SET PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%"
69 | - conda config --set always_yes yes --set changeps1 no
70 | - conda update -q conda
71 | - conda config --add channels pypi
72 | - conda info -a
73 | - "conda create -q -n test-environment python=%PYTHON_VERSION% numpy cython wheel matplotlib"
74 | - activate test-environment
75 | - python -m pip install --no-cache-dir -U pip
76 | - ps: |
77 | Write-Output "Installing requirements..."
78 | pip install -r requirements.txt
79 |
80 | build_script:
81 | - git submodule update --init
82 | - pip install -e .
83 |
84 | test_script:
85 | - ps: |
86 | $ErrorActionPreference = "Stop"
87 | Set-Location "$env:APPVEYOR_BUILD_FOLDER\demo"
88 | python demo.py
89 | Set-Location "$env:APPVEYOR_BUILD_FOLDER"
90 |
91 | after_test:
92 | - pip wheel --wheel-dir=.\dist --no-deps .
93 |
94 | artifacts:
95 | - path: dist\*
96 |
97 | deploy_script:
98 | - ps: |
99 | $ErrorActionPreference = "Stop"
100 | if ($Env:APPVEYOR_REPO_TAG -eq "true"){
101 | pip install twine
102 | pip install -U urllib3==1.26.6
103 | twine upload dist/*.whl
104 | }
105 | else{
106 | "Not a tag (skipped)"
107 | "$APPVEYOR_REPO_TAG"
108 | "$APPVEYOR_REPO_TAG_NAME"
109 | }
110 | # -and $APPVEYOR_REPO_TAG_NAME -match "^v([0-9.a-z]+)$") {
111 |
--------------------------------------------------------------------------------
/demo/demo.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, print_function
2 |
3 | import os
4 | from shutil import rmtree
5 | import argparse
6 |
7 | import numpy as np
8 |
9 | import matplotlib # Remove this line if you don't need them
10 | matplotlib.use('Agg') # Remove this line if you don't need them
11 | import matplotlib.pyplot as plt
12 |
13 | import soundfile as sf
14 | # import librosa
15 | import pyworld as pw
16 |
17 |
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument("-f", "--frame_period", type=float, default=5.0)
20 | parser.add_argument("-s", "--speed", type=int, default=1)
21 |
22 |
23 | EPSILON = 1e-8
24 |
25 | def savefig(filename, figlist, log=True):
26 | #h = 10
27 | n = len(figlist)
28 | # peek into instances
29 | f = figlist[0]
30 | if len(f.shape) == 1:
31 | plt.figure()
32 | for i, f in enumerate(figlist):
33 | plt.subplot(n, 1, i+1)
34 | if len(f.shape) == 1:
35 | plt.plot(f)
36 | plt.xlim([0, len(f)])
37 | elif len(f.shape) == 2:
38 | Nsmp, dim = figlist[0].shape
39 | #figsize=(h * float(Nsmp) / dim, len(figlist) * h)
40 | #plt.figure(figsize=figsize)
41 | plt.figure()
42 | for i, f in enumerate(figlist):
43 | plt.subplot(n, 1, i+1)
44 | if log:
45 | x = np.log(f + EPSILON)
46 | else:
47 | x = f + EPSILON
48 | plt.imshow(x.T, origin='lower', interpolation='none', aspect='auto', extent=(0, x.shape[0], 0, x.shape[1]))
49 | else:
50 | raise ValueError('Input dimension must < 3.')
51 | plt.savefig(filename)
52 | # plt.close()
53 |
54 |
55 | def main(args):
56 | if os.path.isdir('test'):
57 | rmtree('test')
58 | os.mkdir('test')
59 |
60 | x, fs = sf.read('utterance/vaiueo2d.wav')
61 | # x, fs = librosa.load('utterance/vaiueo2d.wav', dtype=np.float64)
62 |
63 | # 1. A convient way
64 | f0, sp, ap = pw.wav2world(x, fs) # use default options
65 | y = pw.synthesize(f0, sp, ap, fs, pw.default_frame_period)
66 |
67 | # 2. Step by step
68 | # 2-1 Without F0 refinement
69 | _f0, t = pw.dio(x, fs, f0_floor=50.0, f0_ceil=600.0,
70 | channels_in_octave=2,
71 | frame_period=args.frame_period,
72 | speed=args.speed)
73 | _sp = pw.cheaptrick(x, _f0, t, fs)
74 | _ap = pw.d4c(x, _f0, t, fs)
75 | _y = pw.synthesize(_f0, _sp, _ap, fs, args.frame_period)
76 | # librosa.output.write_wav('test/y_without_f0_refinement.wav', _y, fs)
77 | sf.write('test/y_without_f0_refinement.wav', _y, fs)
78 |
79 | # 2-2 DIO with F0 refinement (using Stonemask)
80 | f0 = pw.stonemask(x, _f0, t, fs)
81 | sp = pw.cheaptrick(x, f0, t, fs)
82 | ap = pw.d4c(x, f0, t, fs)
83 | y = pw.synthesize(f0, sp, ap, fs, args.frame_period)
84 | # librosa.output.write_wav('test/y_with_f0_refinement.wav', y, fs)
85 | sf.write('test/y_with_f0_refinement.wav', y, fs)
86 |
87 | # 2-3 Harvest with F0 refinement (using Stonemask)
88 | _f0_h, t_h = pw.harvest(x, fs)
89 | f0_h = pw.stonemask(x, _f0_h, t_h, fs)
90 | sp_h = pw.cheaptrick(x, f0_h, t_h, fs)
91 | ap_h = pw.d4c(x, f0_h, t_h, fs)
92 | y_h = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)
93 | # librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)
94 | sf.write('test/y_harvest_with_f0_refinement.wav', y_h, fs)
95 |
96 | # Comparison
97 | savefig('test/wavform.png', [x, _y, y])
98 | savefig('test/sp.png', [_sp, sp])
99 | savefig('test/ap.png', [_ap, ap], log=False)
100 | savefig('test/f0.png', [_f0, f0])
101 |
102 | print('Please check "test" directory for output files')
103 |
104 |
105 | if __name__ == '__main__':
106 | args = parser.parse_args()
107 | main(args)
108 |
--------------------------------------------------------------------------------
/demo/utterance/vaiueo2d.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/9e6cd3e6b2f813d1ffb910d1a0b18ab53f669086/demo/utterance/vaiueo2d.wav
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "setuptools",
4 | "numpy>=2; python_version>='3.9'",
5 | "oldest-supported-numpy; python_version<'3.9'",
6 | "cython",
7 | ]
8 | build-backend = "setuptools.build_meta"
9 |
--------------------------------------------------------------------------------
/pyworld/__init__.py:
--------------------------------------------------------------------------------
1 | """PyWorld is a Python wrapper for WORLD vocoder.
2 |
3 | PyWorld wrappers WORLD, which is a free software for high-quality speech
4 | analysis, manipulation and synthesis. It can estimate fundamental frequency (F0),
5 | aperiodicity and spectral envelope and also generate the speech like input speech
6 | with only estimated parameters.
7 |
8 | For more information, see https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder
9 | """
10 |
11 | from __future__ import division, print_function, absolute_import
12 |
13 | import pkg_resources
14 |
15 | __version__ = pkg_resources.get_distribution('pyworld').version
16 |
17 | from .pyworld import *
18 |
--------------------------------------------------------------------------------
/pyworld/pyworld.pyx:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import cython
3 |
4 | import numpy as np
5 | cimport numpy as np
6 | np.import_array()
7 |
8 |
9 | cdef extern from "world/synthesis.h":
10 | void Synthesis(const double *f0,
11 | int f0_length, const double * const *spectrogram,
12 | const double * const *aperiodicity,
13 | int fft_size, double frame_period,
14 | int fs, int y_length, double *y) except + nogil
15 |
16 |
17 | cdef extern from "world/cheaptrick.h":
18 | ctypedef struct CheapTrickOption:
19 | double q1
20 | double f0_floor
21 | int fft_size
22 |
23 | int GetFFTSizeForCheapTrick(int fs, const CheapTrickOption *option) except +
24 | double GetF0FloorForCheapTrick(int fs, int fft_size) except +
25 | void InitializeCheapTrickOption(int fs, CheapTrickOption *option) except +
26 | void CheapTrick(const double *x, int x_length, int fs, const double *temporal_positions,
27 | const double *f0, int f0_length, const CheapTrickOption *option,
28 | double **spectrogram) except + nogil
29 |
30 |
31 | cdef extern from "world/dio.h":
32 | ctypedef struct DioOption:
33 | double f0_floor
34 | double f0_ceil
35 | double channels_in_octave
36 | double frame_period
37 | int speed
38 | double allowed_range
39 |
40 | void InitializeDioOption(DioOption *option) except +
41 | int GetSamplesForDIO(int fs, int x_length, double frame_period)
42 | void Dio(const double *x, int x_length, int fs, const DioOption *option,
43 | double *temporal_positions, double *f0) except + nogil
44 |
45 |
46 | cdef extern from "world/harvest.h":
47 | ctypedef struct HarvestOption:
48 | double f0_floor
49 | double f0_ceil
50 | double frame_period
51 |
52 | void InitializeHarvestOption(HarvestOption *option)
53 | int GetSamplesForHarvest(int fs, int x_length, double frame_period)
54 | void Harvest(const double *x, int x_length, int fs, const HarvestOption *option,
55 | double *temporal_positions, double *f0) except + nogil
56 |
57 |
58 | cdef extern from "world/d4c.h":
59 | ctypedef struct D4COption:
60 | double threshold
61 |
62 | void InitializeD4COption(D4COption *option) except +
63 | void D4C(const double *x, int x_length, int fs, const double *temporal_positions,
64 | const double *f0, int f0_length, int fft_size, const D4COption *option,
65 | double **aperiodicity) except + nogil
66 |
67 |
68 | cdef extern from "world/stonemask.h":
69 | void StoneMask(const double *x, int x_length, int fs,
70 | const double *temporal_positions, const double *f0, int f0_length,
71 | double *refined_f0) except + nogil
72 |
73 |
74 | cdef extern from "world/codec.h":
75 | int GetNumberOfAperiodicities(int fs)
76 | void CodeAperiodicity(const double * const *aperiodicity, int f0_length,
77 | int fs, int fft_size, double **coded_aperiodicity) except +
78 | void DecodeAperiodicity(const double * const *coded_aperiodicity,
79 | int f0_length, int fs, int fft_size, double **aperiodicity) except +
80 | void CodeSpectralEnvelope(const double * const *spectrogram, int f0_length,
81 | int fs, int fft_size, int number_of_dimensions,
82 | double **coded_spectral_envelope) except +
83 | void DecodeSpectralEnvelope(const double * const *coded_spectral_envelope,
84 | int f0_length, int fs, int fft_size, int number_of_dimensions,
85 | double **spectrogram) except +
86 |
87 |
88 | default_frame_period = 5.0
89 | default_f0_floor = 71.0
90 | default_f0_ceil = 800.0
91 |
92 |
93 | def dio(np.ndarray[double, ndim=1, mode="c"] x not None, int fs,
94 | f0_floor=default_f0_floor, f0_ceil=default_f0_ceil,
95 | channels_in_octave=2.0, frame_period=default_frame_period,
96 | speed=1, allowed_range=0.1):
97 | """DIO F0 extraction algorithm.
98 |
99 | Parameters
100 | ----------
101 | x : ndarray
102 | Input waveform signal.
103 | fs : int
104 | Sample rate of input signal in Hz.
105 | f0_floor : float
106 | Lower F0 limit in Hz.
107 | Default: 71.0
108 | f0_ceil : float
109 | Upper F0 limit in Hz.
110 | Default: 800.0
111 | channels_in_octave : float
112 | Resolution of multiband processing; normally shouldn't be changed.
113 | Default: 2.0
114 | frame_period : float
115 | Period between consecutive frames in milliseconds.
116 | Default: 5.0
117 | speed : int
118 | The F0 estimator may downsample the input signal using this integer factor
119 | (range [1;12]). The algorithm will then operate on a signal at fs/speed Hz
120 | to reduce computational complexity, but high values may negatively impact
121 | accuracy.
122 | Default: 1 (no downsampling)
123 | allowed_range : float
124 | Threshold for voiced/unvoiced decision. Can be any value >= 0, but 0.02 to 0.2
125 | is a reasonable range. Lower values will cause more frames to be considered
126 | unvoiced (in the extreme case of `threshold=0`, almost all frames will be unvoiced).
127 | Default: 0.1
128 |
129 | Returns
130 | -------
131 | f0 : ndarray
132 | Estimated F0 contour.
133 | temporal_positions : ndarray
134 | Temporal position of each frame.
135 | """
136 | cdef int x_length = len(x)
137 | cdef DioOption option
138 | InitializeDioOption(&option)
139 | option.channels_in_octave = channels_in_octave
140 | option.f0_floor = f0_floor
141 | option.f0_ceil = f0_ceil
142 | option.frame_period = frame_period
143 | option.speed = speed
144 | option.allowed_range = allowed_range
145 | f0_length = GetSamplesForDIO(fs, x_length, option.frame_period)
146 | cdef np.ndarray[double, ndim=1, mode="c"] f0 = \
147 | np.zeros(f0_length, dtype=np.dtype('float64'))
148 | cdef np.ndarray[double, ndim=1, mode="c"] temporal_positions = \
149 | np.zeros(f0_length, dtype=np.dtype('float64'))
150 | with (nogil, cython.boundscheck(False)):
151 | Dio(&x[0], x_length, fs, &option, &temporal_positions[0], &f0[0])
152 | return f0, temporal_positions
153 |
154 |
155 | def harvest(np.ndarray[double, ndim=1, mode="c"] x not None, int fs,
156 | f0_floor=default_f0_floor, f0_ceil=default_f0_ceil,
157 | frame_period=default_frame_period):
158 | """Harvest F0 extraction algorithm.
159 |
160 | Parameters
161 | ----------
162 | x : ndarray
163 | Input waveform signal.
164 | fs : int
165 | Sample rate of input signal in Hz.
166 | f0_floor : float
167 | Lower F0 limit in Hz.
168 | Default: 71.0
169 | f0_ceil : float
170 | Upper F0 limit in Hz.
171 | Default: 800.0
172 | frame_period : float
173 | Period between consecutive frames in milliseconds.
174 | Default: 5.0
175 |
176 | Returns
177 | -------
178 | f0 : ndarray
179 | Estimated F0 contour.
180 | temporal_positions : ndarray
181 | Temporal position of each frame.
182 | """
183 | cdef int x_length = len(x)
184 | cdef HarvestOption option
185 | InitializeHarvestOption(&option)
186 | option.f0_floor = f0_floor
187 | option.f0_ceil = f0_ceil
188 | option.frame_period = frame_period
189 | f0_length = GetSamplesForHarvest(fs, x_length, option.frame_period)
190 | cdef np.ndarray[double, ndim=1, mode="c"] f0 = \
191 | np.zeros(f0_length, dtype=np.dtype('float64'))
192 | cdef np.ndarray[double, ndim=1, mode="c"] temporal_positions = \
193 | np.zeros(f0_length, dtype=np.dtype('float64'))
194 | with (nogil, cython.boundscheck(False)):
195 | Harvest(&x[0], x_length, fs, &option, &temporal_positions[0], &f0[0])
196 | return f0, temporal_positions
197 |
198 |
199 | def stonemask(np.ndarray[double, ndim=1, mode="c"] x not None,
200 | np.ndarray[double, ndim=1, mode="c"] f0 not None,
201 | np.ndarray[double, ndim=1, mode="c"] temporal_positions not None,
202 | int fs):
203 | """StoneMask F0 refinement algorithm.
204 |
205 | Parameters
206 | ----------
207 | x : ndarray
208 | Input waveform signal.
209 | f0 : ndarray
210 | Input F0 contour.
211 | temporal_positions : ndarray
212 | Temporal positions of each frame.
213 | fs : int
214 | Sample rate of input signal in Hz.
215 |
216 | Returns
217 | -------
218 | refined_f0 : ndarray
219 | Refined F0 contour.
220 | """
221 | cdef int x_length = len(x)
222 | cdef int f0_length = len(f0)
223 | cdef np.ndarray[double, ndim=1, mode="c"] refined_f0 = \
224 | np.zeros(f0_length, dtype=np.dtype('float64'))
225 | with (nogil, cython.boundscheck(False)):
226 | StoneMask(&x[0], x_length, fs, &temporal_positions[0],
227 | &f0[0], f0_length, &refined_f0[0])
228 | return refined_f0
229 |
230 |
231 | def get_cheaptrick_fft_size(int fs, f0_floor=default_f0_floor):
232 | """Calculate suitable FFT size for CheapTrick given F0 floor.
233 |
234 | Parameters
235 | ----------
236 | fs : int
237 | Sample rate of input signal in Hz.
238 | f0_floor : float
239 | Lower F0 limit in Hz. The required FFT size is a direct
240 | consequence of the F0 floor used.
241 | Default: 71.0
242 |
243 | Returns
244 | -------
245 | fft_size : int
246 | Resulting FFT size.
247 | """
248 | cdef CheapTrickOption option
249 | option.f0_floor = f0_floor
250 | cdef int fft_size = GetFFTSizeForCheapTrick(fs, &option)
251 | return fft_size
252 |
253 | def get_cheaptrick_f0_floor(int fs, int fft_size):
254 | """Calculates actual lower F0 limit for CheapTrick
255 | based on the sampling frequency and FFT size used. Whenever F0 is below
256 | this threshold the spectrum will be analyzed as if the frame is unvoiced
257 | (using kDefaultF0 defined in constantnumbers.h).
258 |
259 | Parameters
260 | ----------
261 | fs : int
262 | Sample rate of input signal in Hz.
263 | fft_size : int
264 | FFT size used for CheapTrick.
265 |
266 | Returns
267 | -------
268 | f0_floor : float
269 | Resulting lower F0 limit in Hz.
270 | """
271 | cdef double f0_floor = GetF0FloorForCheapTrick(fs, fft_size)
272 | return f0_floor
273 |
274 | def cheaptrick(np.ndarray[double, ndim=1, mode="c"] x not None,
275 | np.ndarray[double, ndim=1, mode="c"] f0 not None,
276 | np.ndarray[double, ndim=1, mode="c"] temporal_positions not None,
277 | int fs,
278 | q1=-0.15, f0_floor=default_f0_floor, fft_size=None):
279 | """CheapTrick harmonic spectral envelope estimation algorithm.
280 |
281 | Parameters
282 | ----------
283 | x : ndarray
284 | Input waveform signal.
285 | f0 : ndarray
286 | Input F0 contour.
287 | temporal_positions : ndarray
288 | Temporal positions of each frame.
289 | fs : int
290 | Sample rate of input signal in Hz.
291 | q1 : float
292 | Spectral recovery parameter.
293 | Default: -0.15 (this value was tuned and normally does not need adjustment)
294 | f0_floor : float, None
295 | Lower F0 limit in Hz. Not used in case `fft_size` is specified.
296 | Default: 71.0
297 | fft_size : int, None
298 | FFT size to be used. When `None` (default) is used, the FFT size is computed
299 | automatically as a function of the given input sample rate and F0 floor.
300 | When `fft_size` is specified, the given `f0_floor` parameter is ignored.
301 | Default: None
302 |
303 | Returns
304 | -------
305 | spectrogram : ndarray
306 | Spectral envelope (squared magnitude).
307 | """
308 | cdef CheapTrickOption option
309 | InitializeCheapTrickOption(fs, &option)
310 | option.q1 = q1
311 | if fft_size is None:
312 | option.f0_floor = f0_floor # CheapTrickOption.f0_floor is only used in GetFFTSizeForCheapTrick()
313 | option.fft_size = GetFFTSizeForCheapTrick(fs, &option)
314 | else:
315 | option.fft_size = fft_size
316 | # the f0_floor used by CheapTrick() will be re-compute from this given fft_size
317 | cdef int x_length = len(x)
318 | cdef int f0_length = len(f0)
319 |
320 | cdef double[:, ::1] spectrogram = np.zeros((f0_length, option.fft_size//2 + 1),
321 | dtype=np.dtype('float64'))
322 | cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp)
323 | cdef double **cpp_spectrogram = ( &tmp[0])
324 | cdef np.intp_t i
325 | with (nogil, cython.boundscheck(False)):
326 | for i in range(f0_length):
327 | cpp_spectrogram[i] = &spectrogram[i, 0]
328 |
329 | CheapTrick(&x[0], x_length, fs, &temporal_positions[0],
330 | &f0[0], f0_length, &option, cpp_spectrogram)
331 | return np.array(spectrogram, dtype=np.float64)
332 |
333 |
334 | def d4c(np.ndarray[double, ndim=1, mode="c"] x not None,
335 | np.ndarray[double, ndim=1, mode="c"] f0 not None,
336 | np.ndarray[double, ndim=1, mode="c"] temporal_positions not None,
337 | int fs,
338 | threshold=0.85, fft_size=None):
339 | """D4C aperiodicity estimation algorithm.
340 |
341 | Parameters
342 | ----------
343 | x : ndarray
344 | Input waveform signal.
345 | f0 : ndarray
346 | Input F0 contour.
347 | temporal_positions : ndarray
348 | Temporal positions of each frame.
349 | fs : int
350 | Sample rate of input signal in Hz.
351 | q1 : float
352 | Spectral recovery parameter.
353 | Default: -0.15 (this value was tuned and normally does not need adjustment)
354 | threshold : float
355 | Threshold for aperiodicity-based voiced/unvoiced decision, in range 0 to 1.
356 | If a value of 0 is used, voiced frames will be kept voiced. If a value > 0 is
357 | used some voiced frames can be considered unvoiced by setting their aperiodicity
358 | to 1 (thus synthesizing them with white noise). Using `threshold=0` will result
359 | in the behavior of older versions of D4C. The current default of 0.85 is meant
360 | to be used in combination with the Harvest F0 estimator, which was designed to have
361 | a high voiced/unvoiced threshold (i.e. most frames will be considered voiced).
362 | Default: 0.85
363 | fft_size : int, None
364 | FFT size to be used. When `None` (default) is used, the FFT size is computed
365 | automatically as a function of the given input sample rate and the default F0 floor.
366 | When `fft_size` is specified, it should match the FFT size used to compute
367 | the spectral envelope (i.e. `fft_size=2*(sp.shape[1] - 1)`) in order to get the
368 | desired results when resynthesizing.
369 | Default: None
370 |
371 | Returns
372 | -------
373 | aperiodicity : ndarray
374 | Aperiodicity (envelope, linear magnitude relative to spectral envelope).
375 | """
376 | cdef int x_length = len(x)
377 | cdef int f0_length = len(f0)
378 | cdef int fft_size0
379 | if fft_size is None:
380 | fft_size0 = get_cheaptrick_fft_size(fs, default_f0_floor)
381 | else:
382 | fft_size0 = fft_size
383 |
384 | cdef D4COption option
385 | InitializeD4COption(&option)
386 | option.threshold = threshold
387 |
388 | cdef double[:, ::1] aperiodicity = np.zeros((f0_length, fft_size0//2 + 1),
389 | dtype=np.dtype('float64'))
390 | cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp)
391 | cdef double **cpp_aperiodicity = ( &tmp[0])
392 | cdef np.intp_t i
393 | with (nogil, cython.boundscheck(False)):
394 | for i in range(f0_length):
395 | cpp_aperiodicity[i] = &aperiodicity[i, 0]
396 |
397 | D4C(&x[0], x_length, fs, &temporal_positions[0],
398 | &f0[0], f0_length, fft_size0, &option,
399 | cpp_aperiodicity)
400 | return np.array(aperiodicity, dtype=np.float64)
401 |
402 |
403 | def synthesize(np.ndarray[double, ndim=1, mode="c"] f0 not None,
404 | np.ndarray[double, ndim=2, mode="c"] spectrogram not None,
405 | np.ndarray[double, ndim=2, mode="c"] aperiodicity not None,
406 | int fs,
407 | double frame_period=default_frame_period):
408 | """WORLD synthesis from parametric representation.
409 |
410 | Parameters
411 | ----------
412 | f0 : ndarray
413 | Input F0 contour.
414 | spectrogram : ndarray
415 | Spectral envelope.
416 | aperiodicity : ndarray
417 | Aperodicity envelope.
418 | fs : int
419 | Sample rate of input signal in Hz.
420 | frame_period : float
421 | Period between consecutive frames in milliseconds.
422 | Default: 5.0
423 |
424 | Returns
425 | -------
426 | y : ndarray
427 | Output waveform signal.
428 | """
429 | if (f0.shape[0] != spectrogram.shape[0] or
430 | f0.shape[0] != aperiodicity.shape[0]):
431 | raise ValueError('Mismatched number of frames between F0 ({:d}), '
432 | 'spectrogram ({:d}) and aperiodicty ({:d})'
433 | .format(f0.shape[0], spectrogram.shape[0],
434 | aperiodicity.shape[0]))
435 | if spectrogram.shape[1] != aperiodicity.shape[1]:
436 | raise ValueError('Mismatched dimensionality (spec size) between '
437 | 'spectrogram ({:d}) and aperiodicity ({:d})'
438 | .format(spectrogram.shape[1], aperiodicity.shape[1]))
439 |
440 | cdef int f0_length = len(f0)
441 | cdef int y_length = (f0_length * frame_period * fs / 1000)
442 | cdef int fft_size = (spectrogram.shape[1] - 1)*2
443 | cdef np.ndarray[double, ndim=1, mode="c"] y = \
444 | np.zeros(y_length, dtype=np.dtype('float64'))
445 |
446 | cdef double[:, ::1] spectrogram0 = spectrogram
447 | cdef double[:, ::1] aperiodicity0 = aperiodicity
448 | cdef np.intp_t[:] tmp = np.zeros(f0_length, dtype=np.intp)
449 | cdef np.intp_t[:] tmp2 = np.zeros(f0_length, dtype=np.intp)
450 | cdef double **cpp_spectrogram = ( &tmp[0])
451 | cdef double **cpp_aperiodicity = ( &tmp2[0])
452 | cdef np.intp_t i
453 | with (nogil, cython.boundscheck(False)):
454 | for i in range(f0_length):
455 | cpp_spectrogram[i] = &spectrogram0[i, 0]
456 | cpp_aperiodicity[i] = &aperiodicity0[i, 0]
457 |
458 | Synthesis(&f0[0], f0_length, cpp_spectrogram,
459 | cpp_aperiodicity, fft_size, frame_period, fs, y_length, &y[0])
460 | return y
461 |
462 |
463 | def get_num_aperiodicities(fs):
464 | """Calculate the required dimensionality to code D4C aperiodicity.
465 |
466 | Parameters
467 | ----------
468 | fs : int
469 | Sample rate of input signal in Hz.
470 |
471 | Returns
472 | -------
473 | n_aper : int
474 | Required number of coefficients.
475 | """
476 | cdef int n_aper = GetNumberOfAperiodicities(fs)
477 | return n_aper
478 |
479 | def code_aperiodicity(np.ndarray[double, ndim=2, mode="c"] aperiodicity, fs):
480 | """Reduce dimensionality of D4C aperiodicity.
481 |
482 | Parameters
483 | ----------
484 | aperiodicity : ndarray
485 | Aperodicity envelope.
486 | fs : int
487 | Sample rate of input signal in Hz.
488 |
489 | Returns
490 | -------
491 | coded_aperiodicity : ndarray
492 | Coded aperiodicity envelope.
493 | """
494 | cdef int ap_length = len(aperiodicity)
495 | cdef int fft_size = (aperiodicity.shape[1] - 1)*2
496 | cdef int n_coded_aper = get_num_aperiodicities(fs)
497 |
498 | cdef double[:, ::1] aper = aperiodicity
499 | cdef double[:, ::1] coded_aper = np.zeros((ap_length, n_coded_aper),
500 | dtype=np.dtype('float64'))
501 | cdef np.intp_t[:] tmp1 = np.zeros(ap_length, dtype=np.intp)
502 | cdef np.intp_t[:] tmp2 = np.zeros(ap_length, dtype=np.intp)
503 | cdef double **cpp_aper = ( &tmp1[0])
504 | cdef double **cpp_coded_aper = ( &tmp2[0])
505 | cdef np.intp_t i
506 | for i in range(ap_length):
507 | cpp_aper[i] = &aper[i, 0]
508 | cpp_coded_aper[i] = &coded_aper[i, 0]
509 |
510 | CodeAperiodicity(cpp_aper, ap_length, fs,
511 | fft_size, cpp_coded_aper)
512 |
513 | return np.array(coded_aper, dtype=np.float64)
514 |
515 | def decode_aperiodicity(np.ndarray[double, ndim=2, mode="c"] coded_aperiodicity,
516 | fs, fft_size):
517 | """Restore full dimensionality of coded D4C aperiodicity.
518 |
519 | Parameters
520 | ----------
521 | coded_aperiodicity : ndarray
522 | Coded aperodicity envelope.
523 | fs : int
524 | Sample rate of input signal in Hz.
525 | fft_size : int
526 | FFT size corresponding to the full dimensional aperiodicity.
527 |
528 | Returns
529 | -------
530 | aperiodicity : ndarray
531 | Aperiodicity envelope.
532 | """
533 | cdef int ap_length = len(coded_aperiodicity)
534 | cdef int n_coded_aper = get_num_aperiodicities(fs)
535 | if n_coded_aper != coded_aperiodicity.shape[1]:
536 | raise ValueError('Invalid aperiodicity code dimensionality '
537 | '(was: {:d}, expected: {:d})'
538 | .format(coded_aperiodicity.shape[1], n_coded_aper))
539 |
540 | cdef double[:, ::1] coded_aper = coded_aperiodicity
541 | cdef double[:, ::1] aper = np.zeros((ap_length, fft_size//2 + 1),
542 | dtype=np.dtype('float64'))
543 | cdef np.intp_t[:] tmp1 = np.zeros(ap_length, dtype=np.intp)
544 | cdef np.intp_t[:] tmp2 = np.zeros(ap_length, dtype=np.intp)
545 | cdef double **cpp_coded_aper = ( &tmp1[0])
546 | cdef double **cpp_aper = ( &tmp2[0])
547 | cdef np.intp_t i
548 | for i in range(ap_length):
549 | cpp_coded_aper[i] = &coded_aper[i, 0]
550 | cpp_aper[i] = &aper[i, 0]
551 |
552 | DecodeAperiodicity(cpp_coded_aper, ap_length, fs, fft_size, cpp_aper)
553 |
554 | return np.array(aper, dtype=np.float64)
555 |
556 | def code_spectral_envelope(np.ndarray[double, ndim=2, mode="c"] spectrogram, fs,
557 | number_of_dimensions):
558 | """Reduce dimensionality of spectral envelope.
559 |
560 | Parameters
561 | ----------
562 | spectrogram : ndarray
563 | Spectral envelope.
564 | fs : int
565 | Sample rate of input signal in Hz.
566 | number_of_dimensions : int
567 | Number of dimentions of coded spectral envelope
568 |
569 | Returns
570 | -------
571 | coded_spectral_envelope : ndarray
572 | Coded spectral envelope.
573 | """
574 | cdef int sp_length = len(spectrogram)
575 | cdef int fft_size = (spectrogram.shape[1] - 1)*2
576 |
577 | cdef double[:, ::1] sp = spectrogram
578 | cdef double[:, ::1] coded_sp = np.zeros((sp_length, number_of_dimensions),
579 | dtype=np.dtype('float64'))
580 | cdef np.intp_t[:] tmp1 = np.zeros(sp_length, dtype=np.intp)
581 | cdef np.intp_t[:] tmp2 = np.zeros(sp_length, dtype=np.intp)
582 | cdef double **cpp_sp = ( &tmp1[0])
583 | cdef double **cpp_coded_sp = ( &tmp2[0])
584 | cdef np.intp_t i
585 | for i in range(sp_length):
586 | cpp_sp[i] = &sp[i, 0]
587 | cpp_coded_sp[i] = &coded_sp[i, 0]
588 |
589 | CodeSpectralEnvelope(cpp_sp, sp_length, fs, fft_size,
590 | number_of_dimensions, cpp_coded_sp)
591 |
592 | return np.array(coded_sp, dtype=np.float64)
593 |
594 | def decode_spectral_envelope(np.ndarray[double, ndim=2, mode="c"] coded_spectral_envelope,
595 | fs, fft_size):
596 | """Restore full dimensionality of coded spectral envelope.
597 |
598 | Parameters
599 | ----------
600 | coded_spectral_envelope : ndarray
601 | Coded spectral envelope.
602 | fs : int
603 | Sample rate of input signal in Hz.
604 | fft_size : int
605 | FFT size corresponding to the full dimensional spectral envelope.
606 |
607 | Returns
608 | -------
609 | spectrogram : ndarray
610 | Spectral envelope.
611 | """
612 | cdef int sp_length = len(coded_spectral_envelope)
613 | cdef int number_of_dimensions = len(coded_spectral_envelope[0])
614 | cdef double[:, ::1] coded_sp = coded_spectral_envelope
615 | cdef double[:, ::1] sp = np.zeros((sp_length, fft_size//2 + 1),
616 | dtype=np.dtype('float64'))
617 | cdef np.intp_t[:] tmp1 = np.zeros(sp_length, dtype=np.intp)
618 | cdef np.intp_t[:] tmp2 = np.zeros(sp_length, dtype=np.intp)
619 | cdef double **cpp_coded_sp = ( &tmp1[0])
620 | cdef double **cpp_sp = ( &tmp2[0])
621 | cdef np.intp_t i
622 | for i in range(sp_length):
623 | cpp_coded_sp[i] = &coded_sp[i, 0]
624 | cpp_sp[i] = &sp[i, 0]
625 |
626 | DecodeSpectralEnvelope(cpp_coded_sp, sp_length, fs, fft_size,
627 | number_of_dimensions, cpp_sp)
628 |
629 | return np.array(sp, dtype=np.float64)
630 |
631 | def wav2world(x, fs, fft_size=None, frame_period=default_frame_period):
632 | """Convenience function to do all WORLD analysis steps in a single call.
633 |
634 | In this case only `frame_period` can be configured and other parameters
635 | are fixed to their defaults. Likewise, F0 estimation is fixed to
636 | DIO plus StoneMask refinement.
637 |
638 | Parameters
639 | ----------
640 | x : ndarray
641 | Input waveform signal.
642 | fs : int
643 | Sample rate of input signal in Hz.
644 | fft_size : int
645 | Length of Fast Fourier Transform (in number of samples)
646 | The resulting dimension of `ap` adn `sp` will be `fft_size` // 2 + 1
647 | frame_period : float
648 | Period between consecutive frames in milliseconds.
649 | Default: 5.0
650 |
651 | Returns
652 | -------
653 | f0 : ndarray
654 | F0 contour.
655 | sp : ndarray
656 | Spectral envelope.
657 | ap : ndarray
658 | Aperiodicity.
659 | """
660 | _f0, t = dio(x, fs, frame_period=frame_period)
661 | f0 = stonemask(x, _f0, t, fs)
662 | sp = cheaptrick(x, f0, t, fs, fft_size=fft_size)
663 | ap = d4c(x, f0, t, fs, fft_size=fft_size)
664 | return f0, sp, ap
665 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | matplotlib
3 | argparse; python_version<"3.5"
4 | soundfile
5 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, print_function, with_statement
2 |
3 | import sys
4 | from glob import glob
5 | from os.path import join
6 |
7 | import numpy
8 | from setuptools import Extension, find_packages, setup
9 | from setuptools.command.build_ext import build_ext
10 |
11 |
12 | _VERSION = '0.3.4'
13 |
14 |
15 | world_src_top = join("lib", "World", "src")
16 | world_sources = glob(join(world_src_top, "*.cpp"))
17 |
18 | ext_modules = [
19 | Extension(
20 | name="pyworld.pyworld",
21 | include_dirs=[world_src_top, numpy.get_include()],
22 | sources=[join("pyworld", "pyworld.pyx")] + world_sources,
23 | language="c++")]
24 |
25 | kwargs = {"encoding": "utf-8"} if int(sys.version[0]) > 2 else {}
26 | setup(
27 | name="pyworld",
28 | description="PyWorld: a Python wrapper for WORLD vocoder",
29 | long_description=open("README.md", "r", **kwargs).read(),
30 | long_description_content_type="text/markdown",
31 | ext_modules=ext_modules,
32 | cmdclass={'build_ext': build_ext},
33 | version=_VERSION,
34 | packages=find_packages(),
35 | install_requires=['numpy'],
36 | extras_require={
37 | 'test': ['nose'],
38 | 'sdist': ['numpy', 'cython>=0.24'],
39 | },
40 | author="Pyworld Contributors",
41 | author_email="jeremycchsu@gmail.com",
42 | url="https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder",
43 | keywords=['vocoder'],
44 | classifiers=[],
45 | )
46 |
--------------------------------------------------------------------------------