├── .github
    └── workflows
    │   └── check-toolkit.yaml
├── .gitignore
├── INSTALL.rst
├── LICENSE.txt
├── README.rst
├── doc
    ├── Makefile
    ├── README.md
    ├── conf.py
    ├── index.rst
    ├── make.bat
    └── reference.rst
├── img
    ├── analysis_synthesis.png
    ├── global_spectrum.png
    ├── prosody_labeller.png
    └── screenshot.png
├── pyproject.toml
├── readthedocs.yml
├── samples
    ├── 01l_fact_0001.lab
    ├── 01l_fact_0001.wav
    ├── 14m_proosa_0002_0002.lab
    ├── 14m_proosa_0002_0002.wav
    ├── 40_N1_C_kissankello.TextGrid
    ├── 40_N1_C_kissankello.wav
    ├── 8hz_4hz_1hz.wav
    ├── kan_0001.F0
    ├── kan_0001.lab
    ├── kan_0001.wav
    ├── libritts
    │   ├── 7127_75947_000010_000000.TextGrid
    │   ├── 7127_75947_000010_000000.wav
    │   ├── LJ050-0276.TextGrid
    │   ├── LJ050-0276.wav
    │   ├── LJ050-0277.TextGrid
    │   ├── LJ050-0277.wav
    │   ├── LJ050-0278.TextGrid
    │   └── LJ050-0278.wav
    ├── rjs_01_0003.F0
    ├── rjs_01_0003.lab
    └── rjs_01_0003.wav
├── screenshot.png
├── test
    ├── diff_num.py
    ├── resources
    │   ├── 01l_fact_0001.cwt
    │   ├── libritts
    │   │   ├── 7127_75947_000010_000000.prom
    │   │   ├── LJ050-0276.prom
    │   │   ├── LJ050-0277.prom
    │   │   └── LJ050-0278.prom
    │   └── test_spectrum
    │   │   ├── 8hz_4hz_1hz.freqs.txt
    │   │   └── 8hz_4hz_1hz.spec.txt
    └── run_test.sh
├── tools.rst
└── wavelet_prosody_toolkit
    ├── __init__.py
    ├── configs
        ├── default.yaml
        ├── libritts.yaml
        ├── libritts_boundary.yaml
        └── synthesis.yaml
    ├── cwt_analysis_synthesis.py
    ├── cwt_global_spectrum.py
    ├── prosody_labeller.py
    ├── prosody_tools
        ├── __init__.py
        ├── cwt_utils.py
        ├── duration_processing.py
        ├── energy_processing.py
        ├── f0_processing.py
        ├── filter.py
        ├── lab.py
        ├── loma.py
        ├── misc.py
        ├── pitch_tracker.py
        └── smooth_and_interp.py
    └── wavelet_gui.py


/.github/workflows/check-toolkit.yaml:
--------------------------------------------------------------------------------
 1 | name: check-wavelet-prosody-toolkit
 2 | run-name: ${{ github.actor }} is in validation
 3 | on: [push]
 4 | jobs:
 5 |   build:
 6 |     runs-on: ubuntu-latest
 7 |     strategy:
 8 |       matrix:
 9 |         python-version: [ '3.8', '3.9', '3.10', '3.11' ]
10 | 
11 |     steps:
12 |       - uses: actions/checkout@v3
13 | 
14 |       # Setup python
15 |       - name: Setup python
16 |         uses: actions/setup-python@v1
17 |         with:
18 |           python-version: ${{ matrix.python-version }}
19 |           architecture: x64
20 | 
21 |       # Install everything
22 |       - name: Install wavelet-prosody-toolkit
23 |         run: pip install -e .
24 | 
25 |       # Linux and macOS
26 |       - name: Run the test
27 |         shell: bash -l {0}
28 |         run: |
29 |           bash test/run_test.sh
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ### Python ###
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | doc/_build/
 66 | doc/_modules
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule.*
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # Environments
 84 | .env
 85 | .venv
 86 | env/
 87 | venv/
 88 | ENV/
 89 | env.bak/
 90 | venv.bak/
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | .spyproject
 95 | 
 96 | # Rope project settings
 97 | .ropeproject
 98 | 
 99 | # mkdocs documentation
100 | /site
101 | 
102 | # mypy
103 | .mypy_cache/
104 | 
105 | 
106 | # wavelet specificities
107 | *.prom
108 | *.wav.*
109 | *.wav_*.*


--------------------------------------------------------------------------------
/INSTALL.rst:
--------------------------------------------------------------------------------
 1 | Install procedure
 2 | =================
 3 | 
 4 | Wavelet Prosody Analyzer is a toolkit comprising command line tools and a GUI application.
 5 | All the tools are started from terminal, so some familiarity with command line tools is assumed.
 6 | 
 7 | Installation has been tested only on one Ubuntu Linux, on Arch Linux and on MacOS Sierra machine.
 8 | Running on windows might be possible if the required libraries can be installed.
 9 | 
10 | Default installation
11 | ---------------------
12 | 
13 | To install the toolkit, simply run
14 | 
15 | .. code:: sh
16 | 
17 |     pip install -e .[gui]
18 | 
19 | It will install the dependencies needed to run the toolkit.
20 | 
21 | To be able to run the application globally, the following line should be added to your shell profile file (~/.bashrc or ~/.profile in general):
22 | 
23 | .. code:: sh
24 | 
25 |    export PATH=~/.local/bin:$PATH
26 | 
27 | After restarting the shell, you can finally run the tool by calling them on the command line, like for example:
28 | 
29 | .. code:: sh
30 | 
31 |    wavelet_gui
32 | 
33 | Development mode installation
34 | ------------------------
35 | 
36 | Even if the setup doesn't require it, we advise to use the environment management system conda ( https://docs.conda.io/en/latest/miniconda.html ).
37 | Conda provides an easy way to define the environments and install precompiled packages.
38 | Therefore, the modification you will propose won't affect your system configuration.
39 | 
40 | Assuming you have created activated the conda environment, you can install pre-compiled packages
41 | 
42 | .. code:: sh
43 | 
44 |    conda install scipy numpy matplotlib joblib pyqt
45 | 
46 | We then use the setup script to install the rest of the dependencies:
47 | 
48 | .. code:: sh
49 | 
50 |     pip install -e .[full]
51 | 
52 | To start the Wavelet Prosody Analyzer GUI, run the following commands:
53 | 
54 | .. code:: sh
55 | 
56 |     wavelet_gui
57 | 
58 | if it doesn’t work, please raise an issue on github here: https://github.com/asuni/wavelet_prosody_toolkit/issues .
59 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Antti Suni
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | |github-actions-badge|
  2 | 
  3 | .. |github-actions-badge| image:: https://github.com/asuni/wavelet_prosody_toolkit/actions/workflows/check-toolkit.yaml/badge.svg
  4 | .. _github-actions-badge: https://github.com/asuni/wavelet_prosody_toolkit/actions?query=check-wavelet-prosody-toolkit
  5 | 
  6 | Wavelet prosody analyzer
  7 | ========================
  8 | 
  9 | antti.suni@helsinki.fi
 10 | 
 11 | **UPDATE 3.2.2020**, Additional command-line tools: **batch-processing, global spectrum and analysis-synthesis:** `tools.rst <tools.rst>`__.
 12 | 
 13 | |screenshot|
 14 | 
 15 | .. |screenshot| image:: screenshot.png
 16 | 
 17 | Description
 18 | -----------
 19 | 
 20 | The program calculates f0, energy and duration features from speech
 21 | wav-file, performs continuous wavelet analysis on combined features,
 22 | finds prosodic events (prominences, boundaries) from the wavelet
 23 | scalogram and aligns the events with transcribed units.
 24 | 
 25 | See also:
 26 | 
 27 | [1] Antti Suni, Juraj Šimko, Daniel Aalto, Martti Vainio, Hierarchical
 28 | representation and estimation of prosody using continuous wavelet
 29 | transform, Computer Speech & Language, Volume 45, 2017, Pages 123-136,
 30 | ISSN 0885-2308, https://doi.org/10.1016/j.csl.2016.11.001.
 31 | 
 32 | The default settings of the program are roughly the same as in the
 33 | paper, duration signal was generated from word level labels.
 34 | 
 35 | Requirements
 36 | ------------
 37 | 
 38 | The wavelet prosody analysis depends on several packages which are installed automatically if you
 39 | use the procedure describe in `./INSTALL.rst <INSTALL.rst>`__.
 40 | 
 41 | Here are the main dependencies:
 42 | 
 43 | -  **pycwt** for the wavelet analysis (see https://github.com/regeirk/pycwt/LICENSE.txt )
 44 | -  **pyyaml** for the configuration (see https://github.com/yaml/pyyaml/blob/master/LICENSE )
 45 | -  **soundfile** for playing waves (see https://github.com/bastibe/SoundFile/blob/master/LICENSE )
 46 | -  **wavio** for reading/writing wav (see https://github.com/WarrenWeckesser/wavio/blob/master/README.rst )
 47 | -  **tgt** for reading/writing textgrid (see https://github.com/hbuschme/TextGridTools/blob/master/LICENSE )
 48 | -  **pyqt5** for the gui (see https://www.riverbankcomputing.com/commercial/pyqt )
 49 | -  **matplotlib** for the plot rendering (see https://github.com/matplotlib/matplotlib/blob/master/LICENSE/LICENSE )
 50 | 
 51 | Here the optional dependencies:
 52 | 
 53 | -  **pyreaper** for the f0 extraction (see https://github.com/r9y9/pyreaper/blob/master/LICENSE.md ).
 54 | 
 55 | **The user is invited to have a look at the license of the dependencies.**
 56 | 
 57 | Installation
 58 | ------------
 59 | 
 60 | see `./INSTALL.rst <INSTALL.rst>`__
 61 | 
 62 | Input information
 63 | -----------------
 64 | 
 65 | -  audio files in wav format
 66 | -  transcriptions in either htk .lab format or Praat textgrids
 67 | 
 68 | Usage:
 69 | ------
 70 | 
 71 | 1. Assuming the installation process is done in **global mode**, just do
 72 | 
 73 | .. code:: sh
 74 | 
 75 | 	  wavelet_gui
 76 | 
 77 | Otherwise, go to the root directory of the program in the terminal, and start by
 78 | 
 79 | .. code:: sh
 80 | 
 81 |     python3 wavelet_prosody_toolkit/wavelet_gui.py
 82 | 
 83 | 
 84 | 2. Select directory with speech and transciption files:
 85 |    ``Select Speech Directory...``. Some examples are provided in
 86 |    ``samples/`` directory. Files should have the same root, for example
 87 |    file1.wav, file1.lab or file2.wav file2.TextGrid.
 88 | 
 89 | 3. Select features to use in analysis: ``Prosodic Feats for CWT..``
 90 | 
 91 | 4. Adjust Pitch tracking parameters for the speaker / environment, press
 92 |    ``Reprocess`` to see changes Set range for possible pitch values,
 93 |    typically males ~50-350Hz, females ~100-400Hz. If estimated track
 94 |    skips obviously voiced portions, move voicing threshold slider left.
 95 | 
 96 | -  Alternatively, pre-estimated f0 analyses can be used: file .f0 must
 97 |    exist and it should be either in praat matrix format or as a list
 98 |    file with one f0 value / line, frame shift must be constant 5ms. To
 99 |    get suitable format from Praat, select wav and do:
100 | 
101 |    -  To Pitch: 0.005, 120, 400
102 |    -  To Matrix
103 |    -  Save as matrix text file: “/.f0”
104 | 
105 | 5. Adjust the weights of prosodic features and choose if the final
106 |    signal is combined by summing or multiplying the features
107 | 
108 | 6. Select which tiers to use for durations signal generation / use
109 |    duration estimated from signal
110 | 
111 | 7. Select transcription level of interest: ``Select Tier``
112 | 
113 | 8. You can interactively zoom and move around with the button on top,
114 |    and play the visible section
115 | 
116 | 9. When everything is good, you can ``Process all`` which analyzes all
117 |    utterances in the directory with the current settings, and saves
118 |    prosodic labels in the speech directory as ``<wav_file_name>.prom``
119 | 
120 | Prosodic labels are saved in a tab separated form with the following
121 | columns:
122 | 
123 | .. code::
124 | 
125 |     <file_name> <start_time> <end_time> <unit> <prominence strength> <boundary strength>
126 | 
127 | Advanced Usage:
128 | ---------------
129 | 
130 | Additional customization of the input signals and wavelet analysis is possible by modifying the configuration file. The default configuration is located in:
131 | 
132 | .. code:: sh
133 | 
134 | 	  wavelet_prosody_toolkit/configs/default.yaml
135 | 
136 | You can view an online version here: https://github.com/asuni/wavelet_prosody_toolkit/blob/master/wavelet_prosody_toolkit/configs/default.yaml
137 | 
138 | You are recommended to make a copy of the default.yaml file (to e.g. myconfig.yaml), and modify the copy.  To apply the modified configuration, start the program by
139 | 
140 | .. code:: sh
141 | 
142 | 	  wavelet_gui --config path/to/myconfig.yaml
143 | 
144 | Some helpful shortcuts
145 | ----------------------
146 | 
147 | Here are a list of shortcuts available in the GUI:
148 | 
149 | - **CTRL+q** to quit
150 | - **F11** to switch between fullscreen et normal mode
151 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = wavelet-prosody-toolkit
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = ../build/doc
10 | 
11 | 
12 | 
13 | # Put it first so that "make" without argument is like "make help".
14 | help:
15 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
16 | 
17 | .PHONY: help Makefile
18 | 
19 | # Catch-all target: route all unknown targets to Sphinx using the new
20 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
21 | %: Makefile
22 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
23 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
 1 | # How to generate the documentation
 2 | 
 3 | - extract the info from the source code
 4 | ```sh
 5 | sphinx-apidoc ../wavelet_prosody_toolkit -o _modules -e -M
 6 | ```
 7 | - generate the html documentation
 8 | ```sh
 9 | make html
10 | ```
11 | - documentation is generated in `../build/docs/html`
12 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # wavelet-prosody-toolkit documentation build configuration file, created by
  5 | # sphinx-quickstart on Tue Jan  9 14:55:15 2018.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | 
 21 | import sys
 22 | import os
 23 | sys.path.insert(0, os.path.abspath('../..'))
 24 | 
 25 | # -- Fix non implicit call to sphinx-apidoc (see https://github.com/sphinx-doc/sphinx/issues/1861 )
 26 | import sphinx.apidoc
 27 | 
 28 | def setup(app):
 29 |     """Helper to generate source code documentation
 30 |     """
 31 |     sphinx.apidoc.main(['-f', '-T', '-e', '-o', 'doc/_modules', '../wavelet_prosody_toolkit/'])
 32 | 
 33 | 
 34 | # -- General configuration ------------------------------------------------
 35 | 
 36 | # If your documentation needs a minimal Sphinx version, state it here.
 37 | #
 38 | # needs_sphinx = '1.0'
 39 | 
 40 | # Add any Sphinx extension module names here, as strings. They can be
 41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 42 | # ones.
 43 | extensions = ['sphinx.ext.autodoc',
 44 |               'sphinx.ext.autosummary',
 45 |               'sphinx.ext.doctest',
 46 |               'sphinx.ext.mathjax',
 47 |               'sphinx.ext.ifconfig',
 48 |               'sphinx.ext.viewcode',
 49 |               'sphinx.ext.githubpages',
 50 |               'sphinx.ext.napoleon',
 51 |               'numpydoc']
 52 | 
 53 | # Add any paths that contain templates here, relative to this directory.
 54 | templates_path = ['_templates']
 55 | 
 56 | # The suffix(es) of source filenames.
 57 | # You can specify multiple suffix as a list of string:
 58 | #
 59 | source_parsers = {
 60 |     '.md': 'recommonmark.parser.CommonMarkParser',
 61 | }
 62 | source_suffix = ['.rst', '.md']
 63 | # source_suffix = '.rst'
 64 | 
 65 | # The master toctree document.
 66 | master_doc = 'index'
 67 | 
 68 | # General information about the project.
 69 | project = 'Wavelet prosody analysis toolkit'
 70 | copyright = '2018, Antti Suni'
 71 | author = 'Antti Suni'
 72 | 
 73 | # The version info for the project you're documenting, acts as replacement for
 74 | # |version| and |release|, also used in various other places throughout the
 75 | # built documents.
 76 | #
 77 | # The short X.Y version.
 78 | version = '0.1a'
 79 | # The full version, including alpha/beta/rc tags.
 80 | release = '0.1'
 81 | 
 82 | # The language for content autogenerated by Sphinx. Refer to documentation
 83 | # for a list of supported languages.
 84 | #
 85 | # This is also used if you do content translation via gettext catalogs.
 86 | # Usually you set "language" from the command line for these cases.
 87 | language = None
 88 | 
 89 | # List of patterns, relative to source directory, that match files and
 90 | # directories to ignore when looking for source files.
 91 | # This patterns also effect to html_static_path and html_extra_path
 92 | exclude_patterns = ["README.md", "_modules/modules.rst"]
 93 | 
 94 | # The name of the Pygments (syntax highlighting) style to use.
 95 | pygments_style = 'sphinx'
 96 | 
 97 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 98 | todo_include_todos = False
 99 | 
100 | 
101 | # -- Options for HTML output ----------------------------------------------
102 | 
103 | # The theme to use for HTML and HTML Help pages.  See the documentation for
104 | # a list of builtin themes.
105 | #
106 | html_theme = 'sphinx_rtd_theme'
107 | 
108 | # Theme options are theme-specific and customize the look and feel of a theme
109 | # further.  For a list of options available for each theme, see the
110 | # documentation.
111 | #
112 | # html_theme_options = {}
113 | 
114 | # Add any paths that contain custom static files (such as style sheets) here,
115 | # relative to this directory. They are copied after the builtin static files,
116 | # so a file named "default.css" will overwrite the builtin "default.css".
117 | html_static_path = ['_static']
118 | 
119 | # Custom sidebar templates, must be a dictionary that maps document names
120 | # to template names.
121 | #
122 | # This is required for the alabaster theme
123 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
124 | html_sidebars = {
125 |     '**': [
126 |         'relations.html',  # needs 'show_related': True theme option to display
127 |         'searchbox.html',
128 |     ]
129 | }
130 | 
131 | 
132 | # -- Options for HTMLHelp output ------------------------------------------
133 | 
134 | # Output file base name for HTML help builder.
135 | htmlhelp_basename = 'wavelet-prosody-toolkitdoc'
136 | 
137 | 
138 | # -- Options for LaTeX output ---------------------------------------------
139 | 
140 | latex_elements = {
141 |     # The paper size ('letterpaper' or 'a4paper').
142 |     #
143 |     # 'papersize': 'letterpaper',
144 | 
145 |     # The font size ('10pt', '11pt' or '12pt').
146 |     #
147 |     # 'pointsize': '10pt',
148 | 
149 |     # Additional stuff for the LaTeX preamble.
150 |     #
151 |     # 'preamble': '',
152 | 
153 |     # Latex figure (float) alignment
154 |     #
155 |     # 'figure_align': 'htbp',
156 | }
157 | 
158 | # Grouping the document tree into LaTeX files. List of tuples
159 | # (source start file, target name, title,
160 | #  author, documentclass [howto, manual, or own class]).
161 | latex_documents = [
162 |     (master_doc, 'wavelet-prosody-toolkit.tex', 'wavelet-prosody-toolkit Documentation',
163 |      'Antti Suni', 'manual'),
164 | ]
165 | 
166 | 
167 | # -- Options for manual page output ---------------------------------------
168 | 
169 | # One entry per manual page. List of tuples
170 | # (source start file, name, description, authors, manual section).
171 | man_pages = [
172 |     (master_doc, 'wavelet-prosody-toolkit', 'wavelet-prosody-toolkit Documentation',
173 |      [author], 1)
174 | ]
175 | 
176 | 
177 | # -- Options for Texinfo output -------------------------------------------
178 | 
179 | # Grouping the document tree into Texinfo files. List of tuples
180 | # (source start file, target name, title, author,
181 | #  dir menu entry, description, category)
182 | texinfo_documents = [
183 |     (master_doc, 'wavelet-prosody-toolkit', 'wavelet-prosody-toolkit Documentation',
184 |      author, 'wavelet-prosody-toolkit', 'One line description of project.',
185 |      'Miscellaneous'),
186 | ]
187 | 
188 | # -- Options for Napoleon
189 | napoleon_google_docstring = True
190 | napoleon_numpy_docstring = True
191 | napoleon_include_init_with_doc = False
192 | napoleon_include_private_with_doc = False
193 | napoleon_include_special_with_doc = True
194 | napoleon_use_admonition_for_examples = False
195 | napoleon_use_admonition_for_notes = False
196 | napoleon_use_admonition_for_references = False
197 | napoleon_use_ivar = False
198 | napoleon_use_param = True
199 | napoleon_use_rtype = True
200 | 
201 | numpydoc_show_inherited_class_members = False
202 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ../README.rst
 2 | 
 3 | API Documentation
 4 | -----------------
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 | 
 9 |    reference
10 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=PyCWT
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/doc/reference.rst:
--------------------------------------------------------------------------------
1 | API Reference
2 | =============
3 | .. toctree::
4 |    :maxdepth: 3
5 | 
6 |    _modules/wavelet_prosody_toolkit
7 |    _modules/wavelet_prosody_toolkit.prosody_tools
8 | 


--------------------------------------------------------------------------------
/img/analysis_synthesis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/img/analysis_synthesis.png


--------------------------------------------------------------------------------
/img/global_spectrum.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/img/global_spectrum.png


--------------------------------------------------------------------------------
/img/prosody_labeller.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/img/prosody_labeller.png


--------------------------------------------------------------------------------
/img/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/img/screenshot.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | build-backend = "setuptools.build_meta"
 3 | requires = [
 4 |   "setuptools>=61",
 5 | ]
 6 | 
 7 | [project]
 8 | name = "wavelet_prosody_toolkit"
 9 | version = "1.0"
10 | authors = [
11 |   {name="Antti Suni", email="antti.suni@helsinki.fi"},
12 |   {name="Sébastien Le Maguer", email="sebastien.lemaguer@helsinki.fi"}
13 | ]
14 | description = "Prosody wavelet analysis toolkit"
15 | readme = {file="README.rst", content-type="text/x-rst"}
16 | classifiers = [
17 |   'Development Status :: 4 - Beta',
18 |   # Audience
19 |   'Intended Audience :: Science/Research',
20 |   # Topics
21 |   'Topic :: Multimedia :: Sound/Audio :: Speech',
22 |   'Topic :: Scientific/Engineering :: Information Analysis',
23 |   'Topic :: Scientific/Engineering :: Visualization',
24 |   # Pick your license as you wish
25 |   'License :: OSI Approved :: MIT License',
26 |   # Python version (FIXME: fix the list of python version based on travis results)
27 |   'Programming Language :: Python :: 3',
28 |   'Programming Language :: Python :: 3.7',
29 |   'Programming Language :: Python :: 3.8',
30 |   'Programming Language :: Python :: 3.9',
31 |   'Programming Language :: Python :: 3.10',
32 |   'Programming Language :: Python :: 3.11',
33 | ]
34 | dependencies = [
35 |   "pyyaml",
36 |   "pycwt",
37 |   "numpy",
38 |   "scipy",
39 |   "soundfile",
40 |   "tgt",
41 |   "wavio",
42 |   "joblib"
43 | ]
44 | 
45 | [project.optional-dependencies]
46 | gui = ["pyqt5", "matplotlib"]
47 | reaper = ["pyreaper"]
48 | docs = ["sphinx", "sphinx_rtd_theme", "numpydoc"]
49 | full = [
50 |   "pyqt5",
51 |   "matplotlib",
52 |   "pyreaper",
53 |   "sphinx",
54 |   "sphinx_rtd_theme",
55 |   "numpydoc"
56 | ]
57 | dev = ["pre-commit"]
58 | 
59 | 
60 | [project.scripts]
61 | prosody_labeller = "wavelet_prosody_toolkit.prosody_labeller:main"
62 | cwt_analysis_synthesis = "wavelet_prosody_toolkit.cwt_analysis_synthesis:main"
63 | wavelet_gui = "wavelet_prosody_toolkit.wavelet_gui:main"
64 | 
65 | [project.urls]
66 | Homepage = "https://github.com/asuni/wavelet_prosody_toolkit"
67 | Issues = "https://github.com/asuni/wavelet_prosody_toolkit/issues"
68 | git = "https://github.com/asuni/wavelet_prosody_toolkit.git"
69 | 
70 | [tool.setuptools]
71 | packages = ["wavelet_prosody_toolkit"]
72 | 
73 | [tool.black]
74 | line-length = 120
75 | target-version = ['py311']
76 | include = '\.pyi?$'
77 | exclude = '''
78 | /(
79 |   \.toml
80 |   |\.sh
81 |   |\.git
82 |   |\.ini
83 |   |Dockerfile
84 |   |Jenkinfile
85 | )/
86 | '''
87 | 
88 | [tool.flake8]
89 | max-line-length = 120
90 | 
91 | [tool.basedpyright]
92 | typeCheckingMode = "standard"
93 | 


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | 
 3 | build:
 4 |   image: latest
 5 | 
 6 | python:
 7 |   version: 3.6
 8 |   pip_install: true
 9 |   extra_requirements:
10 |     - docs
11 | 


--------------------------------------------------------------------------------
/samples/01l_fact_0001.lab:
--------------------------------------------------------------------------------
 1 | 0 11200000 # !SIL
 2 | 11200000 11700000 p puheen
 3 | 11700000 12200000 u 
 4 | 12200000 12900000 h 
 5 | 12900000 13550000 e 
 6 | 13550000 14050000 e 
 7 | 14050000 14550000 n 
 8 | 14550000 14950000 t tutkimus
 9 | 14950000 15700000 u 
10 | 15700000 16300000 t 
11 | 16300000 16950000 k 
12 | 16950000 17300000 i 
13 | 17300000 18100000 m 
14 | 18100000 18750000 u 
15 | 18750000 20150000 s 
16 | 20150000 20950000 o on
17 | 20950000 21300000 n 
18 | 21300000 21700000 j jo
19 | 21700000 22250000 o 
20 | 22250000 22850000 l lähtökohdiltaan
21 | 22850000 23650000 A_ 
22 | 23650000 23950000 h 
23 | 23950000 24500000 t 
24 | 24500000 25050000 O_ 
25 | 25050000 25700000 k 
26 | 25700000 26350000 o 
27 | 26350000 26650000 h 
28 | 26650000 27450000 d 
29 | 27450000 27900000 i 
30 | 27900000 28200000 l 
31 | 28200000 28850000 t 
32 | 28850000 29300000 a 
33 | 29300000 29750000 a 
34 | 29750000 30250000 n 
35 | 30250000 31050000 m monia
36 | 31050000 31800000 o 
37 | 31800000 32200000 n 
38 | 32200000 33150000 i 
39 | 33150000 33800000 a 
40 | 33800000 35050000 e eri
41 | 35050000 35300000 r 
42 | 35300000 35800000 i 
43 | 35800000 36600000 t tieteenaloja
44 | 36600000 37350000 i 
45 | 37350000 38050000 e 
46 | 38050000 38950000 t 
47 | 38950000 39600000 e 
48 | 39600000 39850000 e 
49 | 39850000 40600000 n 
50 | 40600000 41500000 a 
51 | 41500000 41850000 l 
52 | 41850000 42350000 o 
53 | 42350000 42800000 j 
54 | 42800000 43400000 a 
55 | 43400000 44500000 k kiinnostava
56 | 44500000 44850000 i 
57 | 44850000 45200000 i 
58 | 45200000 45500000 n 
59 | 45500000 45800000 n 
60 | 45800000 46150000 o 
61 | 46150000 46950000 s 
62 | 46950000 47350000 t 
63 | 47350000 47800000 a 
64 | 47800000 48350000 v 
65 | 48350000 48650000 a 
66 | 48650000 49650000 t tutkimuskohde
67 | 49650000 50200000 u 
68 | 50200000 50850000 t 
69 | 50850000 51650000 k 
70 | 51650000 52000000 i 
71 | 52000000 52500000 m 
72 | 52500000 53150000 u 
73 | 53150000 53750000 s 
74 | 53750000 54650000 k 
75 | 54650000 55300000 o 
76 | 55300000 56400000 h 
77 | 56400000 57100000 d 
78 | 57100000 57350000 e 
79 | 57350000 60550000 # 
80 | 60550000 62300000 # !SIL
81 | 


--------------------------------------------------------------------------------
/samples/01l_fact_0001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/samples/01l_fact_0001.wav


--------------------------------------------------------------------------------
/samples/14m_proosa_0002_0002.lab:
--------------------------------------------------------------------------------
 1 | 0 1900000 # !SIL
 2 | 1900000 2550000 t täällä
 3 | 2550000 2850000 A_ 
 4 | 2850000 3250000 A_ 
 5 | 3250000 3500000 l 
 6 | 3500000 3750000 l 
 7 | 3750000 4000000 A_ 
 8 | 4000000 4250000 v voi
 9 | 4250000 4500000 o 
10 | 4500000 4750000 i 
11 | 4750000 5050000 o olla
12 | 5050000 5350000 l 
13 | 5350000 5600000 l 
14 | 5600000 5850000 a 
15 | 5850000 6500000 h helvetin
16 | 6500000 7000000 e 
17 | 7000000 7750000 l 
18 | 7750000 8000000 v 
19 | 8000000 8450000 e 
20 | 8450000 9350000 t 
21 | 9350000 9750000 i 
22 | 9750000 10000000 n 
23 | 10000000 11100000 h hauskaa
24 | 11100000 11700000 a 
25 | 11700000 12250000 u 
26 | 12250000 13450000 s 
27 | 13450000 13850000 k 
28 | 13850000 14100000 a 
29 | 14100000 14650000 a 
30 | 14650000 15500000 k kun
31 | 15500000 15950000 u 
32 | 15950000 16300000 n 
33 | 16300000 16550000 v vain
34 | 16550000 16800000 a 
35 | 16800000 17050000 i 
36 | 17050000 17400000 n 
37 | 17400000 18250000 s sattuu
38 | 18250000 18650000 a 
39 | 18650000 19750000 t 
40 | 19750000 20250000 t 
41 | 20250000 20750000 u 
42 | 20750000 21150000 u 
43 | 21150000 21650000 h hyvään
44 | 21650000 22250000 y 
45 | 22250000 22550000 v 
46 | 22550000 22800000 A_ 
47 | 22800000 23700000 A_ 
48 | 23700000 24050000 n 
49 | 24050000 25100000 s seuraan
50 | 25100000 26300000 e 
51 | 26300000 27200000 u 
52 | 27200000 27750000 r 
53 | 27750000 28600000 a 
54 | 28600000 29300000 a 
55 | 29300000 29550000 n 
56 | 29550000 31400000 # !SIL
57 | 


--------------------------------------------------------------------------------
/samples/14m_proosa_0002_0002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/samples/14m_proosa_0002_0002.wav


--------------------------------------------------------------------------------
/samples/40_N1_C_kissankello.TextGrid:
--------------------------------------------------------------------------------
  1 | File type = "ooTextFile"
  2 | Object class = "TextGrid"
  3 | 
  4 | xmin = 0 
  5 | xmax = 2.602675736961451 
  6 | tiers? <exists> 
  7 | size = 5 
  8 | item []: 
  9 |     item [1]:
 10 |         class = "IntervalTier" 
 11 |         name = "sanat" 
 12 |         xmin = 0 
 13 |         xmax = 2.602675736961451 
 14 |         intervals: size = 5 
 15 |         intervals [1]:
 16 |             xmin = 0 
 17 |             xmax = 0.3813532988377878 
 18 |             text = "" 
 19 |         intervals [2]:
 20 |             xmin = 0.3813532988377878 
 21 |             xmax = 1.373576792765878 
 22 |             text = "S0" 
 23 |         intervals [3]:
 24 |             xmin = 1.373576792765878 
 25 |             xmax = 1.9027481016021124 
 26 |             text = "S1" 
 27 |         intervals [4]:
 28 |             xmin = 1.9027481016021124 
 29 |             xmax = 2.2477643338446045 
 30 |             text = "S2" 
 31 |         intervals [5]:
 32 |             xmin = 2.2477643338446045 
 33 |             xmax = 2.602675736961451 
 34 |             text = "" 
 35 |     item [2]:
 36 |         class = "IntervalTier" 
 37 |         name = "tavut" 
 38 |         xmin = 0 
 39 |         xmax = 2.602675736961451 
 40 |         intervals: size = 6 
 41 |         intervals [1]:
 42 |             xmin = 0 
 43 |             xmax = 1.373576792765878 
 44 |             text = "" 
 45 |         intervals [2]:
 46 |             xmin = 1.373576792765878 
 47 |             xmax = 1.6765392363669516 
 48 |             text = "T1" 
 49 |         intervals [3]:
 50 |             xmin = 1.6765392363669516 
 51 |             xmax = 1.9027481016021124 
 52 |             text = "T2" 
 53 |         intervals [4]:
 54 |             xmin = 1.9027481016021124 
 55 |             xmax = 2.087091927631417 
 56 |             text = "T3" 
 57 |         intervals [5]:
 58 |             xmin = 2.087091927631417 
 59 |             xmax = 2.2477643338446045 
 60 |             text = "T4" 
 61 |         intervals [6]:
 62 |             xmin = 2.2477643338446045 
 63 |             xmax = 2.602675736961451 
 64 |             text = "" 
 65 |     item [3]:
 66 |         class = "IntervalTier" 
 67 |         name = "vokaalit" 
 68 |         xmin = 0 
 69 |         xmax = 2.602675736961451 
 70 |         intervals: size = 9 
 71 |         intervals [1]:
 72 |             xmin = 0 
 73 |             xmax = 1.4793689320369707 
 74 |             text = "" 
 75 |         intervals [2]:
 76 |             xmin = 1.4793689320369707 
 77 |             xmax = 1.571320010157871 
 78 |             text = "V1" 
 79 |         intervals [3]:
 80 |             xmin = 1.571320010157871 
 81 |             xmax = 1.7530452937140502 
 82 |             text = "" 
 83 |         intervals [4]:
 84 |             xmin = 1.7530452937140502 
 85 |             xmax = 1.8182176368032497 
 86 |             text = "V2" 
 87 |         intervals [5]:
 88 |             xmin = 1.8182176368032497 
 89 |             xmax = 1.9419381861997345 
 90 |             text = "" 
 91 |         intervals [6]:
 92 |             xmin = 1.9419381861997345 
 93 |             xmax = 2.0138858704032554 
 94 |             text = "V3" 
 95 |         intervals [7]:
 96 |             xmin = 2.0138858704032554 
 97 |             xmax = 2.159362246891967 
 98 |             text = "" 
 99 |         intervals [8]:
100 |             xmin = 2.159362246891967 
101 |             xmax = 2.2477643338446045 
102 |             text = "V4" 
103 |         intervals [9]:
104 |             xmin = 2.2477643338446045 
105 |             xmax = 2.602675736961451 
106 |             text = "" 
107 |     item [4]:
108 |         class = "IntervalTier" 
109 |         name = "narina" 
110 |         xmin = 0 
111 |         xmax = 2.602675736961451 
112 |         intervals: size = 1 
113 |         intervals [1]:
114 |             xmin = 0 
115 |             xmax = 2.602675736961451 
116 |             text = "" 
117 |     item [5]:
118 |         class = "TextTier" 
119 |         name = "f0" 
120 |         xmin = 0 
121 |         xmax = 2.602675736961451 
122 |         points: size = 0 
123 | 


--------------------------------------------------------------------------------
/samples/40_N1_C_kissankello.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/samples/40_N1_C_kissankello.wav


--------------------------------------------------------------------------------
/samples/8hz_4hz_1hz.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/samples/8hz_4hz_1hz.wav


--------------------------------------------------------------------------------
/samples/kan_0001.F0:
--------------------------------------------------------------------------------
1 | "ooTextFile"
2 | "Matrix"
3 | 0 6.0808125000000004 1212 0.0050000000000000001 0.012906249999999951
4 | 1 1 1 1 1
5 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 228.36184485720696 231.61652757293712 232.94093729501785 227.51383368857464 216.33460906087706 215.27691244045562 221.11449612190302 218.73749076716285 210.80632647608695 216.90325929629068 223.94384903673213 225.41841484548863 226.11846290406339 226.11954324729203 224.88139992790673 223.62215680378435 222.51631893962201 223.03075366773194 222.96915357418595 223.73152556085446 224.80609593772559 225.50824037978597 226.81830524546291 228.14501252280334 228.78948597152723 229.07246619767847 230.14815441738497 235.26130461363218 236.32073042144751 236.49450434786604 238.46095315396661 240.84128480245542 244.37167124825746 248.87864181690765 252.7494925271013 256.02256272410347 259.5448056257564 261.76819010465573 263.64089652720781 267.16528052662721 270.36663728844292 272.30709486212839 274.06445940191497 278.16646409328678 283.03631150640149 286.33032571766074 289.32893079773515 292.74926085366747 294.47530691114048 296.20412557514214 299.77653491613984 301.34640298776617 300.72265065023493 300.57117180343602 300.54322241784689 300.87812316367229 296.91149071872832 288.47422347060598 271.90791284510249 259.16664198247071 263.5841298973549 270.99517054967697 277.56980539095446 284.69226936936599 292.39080752485023 298.14893975006152 300.64830326945861 300.535308712438 298.21337588807489 293.82945415167768 290.05836157813604 287.18566670309002 282.38276796082596 262.67393106580749 248.17949308134499 243.58330436526913 234.58666038254651 217.08028849022514 264.77749876705622 283.75529823726305 295.29117936685725 271.2658696840985 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 198.9937151583442 195.82911314957175 191.24619646152317 186.55121990123757 183.93790608393667 183.24964570276398 182.58674142580443 181.33201713840566 181.82060769563449 182.15963314579494 183.26864823387922 182.91813760591586 182.45025384584639 182.74787693489233 182.15209334520125 181.20248653105787 181.64176296643251 182.75239234491841 182.8105252313172 182.61521716020141 183.11764202454464 183.88269552193916 186.6996283750409 190.01788429522395 192.04589741202062 194.47419919231078 198.40352839749826 204.08051172009024 207.15371998349769 212.90950156591094 216.39833239198393 216.70095096061533 230.47866350865178 234.55203378343182 234.72607756768389 226.95416182777086 261.61467344386904 273.77354649941964 268.10104244142565 258.44961985926363 259.5985309297489 276.08290840581191 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 238.73121837536968 231.65038175750107 227.00387059504189 218.55191493399556 212.813954226091 210.24313391777324 205.81156815827504 202.60817605765246 203.94956257286449 204.02247074006755 203.84687380119342 203.20810048697689 204.04584283404384 204.27213085035942 205.01051493569597 205.817153331632 206.93546010922626 210.70573458494977 214.00031103248378 215.94306869576735 218.44177530908811 219.7513254354416 221.57667262730408 222.71534374008451 222.39911569945374 222.2283197929068 222.99633068386871 223.23696243738738 223.91728945682996 224.31623142771136 226.4473588674978 229.20485949439603 230.131810794568 232.71389385289589 234.91434941444564 236.06569070482206 238.6461295490025 241.39250595463048 243.48809413261748 247.88868845252503 250.41994952660056 251.16545021663009 252.43215107592613 252.16637373750152 252.40989363141915 252.40615696983519 252.76927739314408 253.34286593112574 254.87372095819572 258.81715962176588 264.29701297096096 269.57150425336033 273.93073262777796 275.65352956053607 275.57767606915252 276.07298001511361 277.09045304728744 278.05862244527884 278.80239755104031 279.46087469795668 279.96428544241837 278.73668143065146 277.10602855821747 275.267213684946 270.71440548051459 261.25729813874545 250.67082887829321 251.74147179721723 261.93192397712158 265.56461357487308 268.31719701572075 269.2463986241122 270.29040468318146 269.96070474173581 268.96009005448661 264.53472984609004 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 210.57779289837967 208.72200414895624 200.98758135492108 198.0892655572633 194.36902460857104 190.26772264367031 190.53710083685701 186.78475168693097 184.17419795629763 188.18261791415173 184.04517616078388 186.02976516762115 188.29901376666066 189.51076081203095 191.62248211585495 194.36706569453969 192.74401928457456 0 0 0 0 0 242.76956307244018 245.34296625486704 236.4593619292514 230.06892883318261 230.56880286244373 246.41440331914549 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 222.9773632188253 223.21560494756801 224.68006384268512 221.48185355999314 213.96882598904384 209.50588864003004 207.73562464175612 207.36391135453215 207.34667993749852 205.46253163203122 205.49317983442677 206.58259237278179 207.41174911006925 208.33644940205383 210.45806255817129 212.1271723390411 213.43304309411067 215.77599217722852 217.32825076801041 218.73018646892262 220.77372374500874 223.50874573647096 225.28445301587826 225.78586128346015 226.75681531299375 229.18509387962081 230.71369488860807 231.65744150710364 232.08883363375602 230.12881235918394 226.74688711060483 224.03602004015991 224.56770916465351 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 250.72415528073927 235.64345150821453 237.88962216356083 244.85877412818601 250.83363745289651 254.46671785519914 257.6102773584974 260.22376197805187 261.15932330660883 261.60156788006469 262.55281277174538 260.74378523617111 259.56357193214387 258.21892944587137 254.43961262710158 249.62001237593793 246.15549992812367 244.49906211565482 238.69522394781504 236.06780100937149 236.35362419753531 233.24196021019705 231.98236402530358 233.66758793248553 232.24441063254366 230.06784980811693 230.54524894551832 230.76548046114473 230.00538569147815 230.05031487502833 229.48008675848956 228.1879421579261 226.57687380235529 225.51335844587928 225.275881713904 224.59384441979793 224.88847859650778 222.88164326422691 219.94298326294577 218.6976195683113 214.37801341920294 209.52065521341018 207.11300405401644 206.72809591393204 205.51812662856528 205.29733315493471 204.63974107730769 203.81440196851472 205.84394900680562 211.6695943612834 209.98221386329209 209.51272611029225 211.4758549299946 213.45660007028457 214.34010578091304 215.1969415637094 215.74168256218439 215.82024173143029 216.08637434019025 216.83700189350901 218.17647520585606 220.22870507113396 221.66948390212221 222.36652896972709 223.50259880963355 224.43233943941539 226.42185047368241 228.37813164181213 229.27643829583494 230.67273145621482 232.42982156985428 233.89256102656611 237.51360274036347 235.58744598339095 235.7061229212475 239.50880328850968 244.7050760102301 251.62676051743816 244.41235544486986 242.74041677023084 243.43034456053374 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 234.33011593775751 230.55758287210898 227.82790300891691 224.70855073399983 221.05060056992482 216.51557319954949 214.08846724939244 212.01444110917797 210.38417950696766 211.08960178934507 209.58586151700419 204.07276996040835 202.61117457980203 207.24036923786772 211.82686256218227 212.84519828917971 212.97687099568881 214.09143183527522 215.56193619437525 216.30317822772216 218.77319334891112 220.64943170143084 224.03417076130137 226.26463711529314 227.71292891189745 228.92478463087409 231.39963105256936 233.35726587383138 235.18184339568458 238.175655199673 240.9226026984899 241.69026758275251 242.23781436807914 243.761322830925 242.80998784908019 240.21811736137892 235.07547377361672 230.80524061841385 227.80249763147663 228.21379110285443 0 0 0 0 0 0 0 0 292.09856348586072 290.46298899787615 290.71949143530225 292.35433621454075 292.30523678880445 291.74070029277243 290.78919541757818 290.84386736590426 291.10656583730429 288.99131976717416 288.5427966773857 290.34976754415413 290.12116122964653 290.55055926258035 291.34236233693804 290.88460869133559 289.20848658844903 286.93040799204937 282.55166910675905 262.91343080335326 251.39631081093097 239.15288547449609 221.65823166884323 222.4357525031954 221.96563281722302 212.47245441603911 209.3598698358505 210.71266660858771 206.93728472628271 206.05392812396502 224.56283402146508 239.72921973403791 244.27738688146906 249.58018815440261 243.19343735120745 238.09182243902595 233.92008214255333 230.91560724322798 227.780030579731 224.7263024707342 220.8377443704872 215.46891513625621 212.64780397297983 212.09226178608077 211.16518908048721 209.9662470394436 209.87602634910405 210.21883186948017 210.52424778129404 209.96465801539696 208.0709375121759 204.88624094713032 202.53144517375245 200.70418959125254 198.2048343808641 194.47628647781013 191.57540462581372 192.17638647270556 192.63457664151304 194.20891298977676 194.31854441066733 192.73015442924904 191.55708354452983 191.54400536020381 190.88775073286357 189.99455805631615 189.76488940445043 188.31604276636881 188.26620167826962 187.17701496709407 185.38257586939426 180.00071090430069 177.94143219647739 183.77511957738051 184.15213238004824 183.65681781893946 182.25731303302294 180.85217155761882 178.9063537120779 179.93393109888748 179.08979596280119 180.1556316806236 181.33000258865738 181.12465687877568 184.29970195297872 184.65679882426676 183.74038292792903 185.52442547496713 187.5800619014924 188.88388428753737 192.98650542741819 195.57022884466747 198.4434574842698 200.97658357697313 203.23726555571636 204.0306757615939 206.54848695209168 205.62494962780724 212.06920101870182 209.52108181320025 209.2081275810736 198.71541206943812 199.67671883143365 204.62827356504823 222.19733703222693 200.88818743081967 203.21649986166059 203.06897608803314 210.68770645195698 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 222.50616965196218 205.9044392660463 198.05767052561851 193.61783531783493 191.01914036991244 188.38185587157582 185.12928185457218 180.96706679447604 181.29083948756488 180.47383348602852 179.77796244633154 179.13761775961137 180.87477148543988 193.5842746500789 221.51539336596508 219.2785971038868 216.56324152596497 215.70260531999457 213.66842374328701 209.48540964747329 208.02686883732582 207.20686693276204 206.64609470240188 208.11476684406546 209.79753325640499 210.77365482092569 210.24690926752032 208.86508723755199 205.99417207562809 203.42612739419141 202.06836372052888 201.08683839660313 202.6100708282317 203.77899261439697 203.90929176901111 203.73174494617857 204.25618849129413 205.19684306502194 205.10897248510665 204.49918300377468 207.80124634822366 209.48305921237784 206.93850674987567 206.89584234768796 207.75732328416248 207.93578837639419 206.64798401544473 205.37830542099522 204.0562171312844 203.59874277208991 202.61818742926718 202.12261707704059 202.82540556897882 202.21539407492276 202.41928537520073 203.6591913423625 204.16255074366478 203.59701602994809 203.93103996184141 206.80563311048212 207.63277664533754 207.3490117254114 208.6546475064082 208.55836742325991 211.79226896002973 212.97561328270854 212.29601377471761 211.47474432605878 213.88754613273028 213.89325885022939 211.37505611082847 209.79052370641114 210.31143010312297 210.88610831238142 207.64206301603565 205.78184809705527 205.42632932067573 168.93139437979798 168.95761281518313 170.2954996953361 171.80231954531038 209.96830939010982 0 0 0 0 0 0 0 0 0 0 0 0 233.93532122674213 227.61770669533681 214.59183701168834 210.54902741478102 211.23354071712194 211.94187209693339 210.49350093663716 207.94907881245379 205.39735829109333 201.70287027150908 199.14735095838074 196.9810894495607 192.34386595299893 191.04471313030561 189.46278851079839 184.24574920643693 182.01497709598524 173.87066442878151 168.44155642067741 179.80545787974154 203.55263360364185 194.61999298053954 201.27551758731889 197.22315915448334 198.13621794660938 157.84543411266995 164.17185067327807 171.06064875846695 169.8817412457569 169.69480530493968 169.52609055000278 164.92676799776379 168.00841100783254 167.16039501885416 165.85202907101646 164.66857822110916 164.21509940271727 162.21061364082527 162.26883145090653 162.06494736479144 162.44864690276867 162.63332040640779 162.78820004943151 163.04256934411885 161.24311613025844 161.92398237885558 161.32044757540996 162.19415760274464 165.25750865846874 164.17151173872642 165.42156381092249 163.91085375276634 167.53511500488051 170.61114555801888 164.31902939328563 161.87365404885935 157.17393385493168 157.73938681824654 157.50935883358616 152.9724434977534 156.00014448517265 152.24785298001075 145.78832996357627 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6 | 


--------------------------------------------------------------------------------
/samples/kan_0001.lab:
--------------------------------------------------------------------------------
 1 | 0 5600000 sil _UTTEND_
 2 | 5600000 5950000 a ameirika
 3 | 5950000 7050000 m
 4 | 7050000 8150000 e
 5 | 8150000 8400000 i
 6 | 8400000 8700000 r
 7 | 8700000 9800000 i
 8 | 9800000 10550000 k
 9 | 10550000 13000000 a
10 | 13000000 14700000 sil _SPACE_
11 | 14700000 16500000 s sannyukta
12 | 16500000 17250000 a
13 | 17250000 18300000 n
14 | 18300000 18600000 n
15 | 18600000 19450000 y
16 | 19450000 20500000 u
17 | 20500000 20650000 k
18 | 20650000 22000000 t
19 | 22000000 23700000 a
20 | 23700000 25050000 sil _SPACE_
21 | 25050000 26600000 s sannsthaana
22 | 26600000 27100000 a
23 | 27100000 27350000 n
24 | 27350000 28050000 n
25 | 28050000 29150000 s
26 | 29150000 29850000 t
27 | 29850000 30500000 h
28 | 30500000 31450000 a
29 | 31450000 31650000 a
30 | 31650000 32450000 n
31 | 32450000 34050000 a
32 | 34050000 36950000 sil _SPACE_
33 | 36950000 37800000 s sannvidhaana
34 | 37800000 38400000 a
35 | 38400000 38800000 n
36 | 38800000 39900000 n
37 | 39900000 40050000 v
38 | 40050000 41150000 i
39 | 41150000 41850000 d
40 | 41850000 42000000 h
41 | 42000000 42200000 a
42 | 42200000 43500000 a
43 | 43500000 44600000 n
44 | 44600000 46150000 a
45 | 46150000 47500000 sil _SPACE_
46 | 47500000 48100000 d dinaacharand
47 | 48100000 48850000 i
48 | 48850000 49600000 n
49 | 49600000 50550000 a
50 | 50550000 51450000 a
51 | 51450000 52000000 c
52 | 52000000 52550000 h
53 | 52550000 52800000 a
54 | 52800000 53500000 r
55 | 53500000 53750000 a
56 | 53750000 54150000 n
57 | 54150000 54750000 d
58 | 54750000 54750000 skip _TILDE_
59 | 54750000 55700000 e e
60 | 55700000 60750000 sil _UTTEND_
61 | 


--------------------------------------------------------------------------------
/samples/kan_0001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/samples/kan_0001.wav


--------------------------------------------------------------------------------
/samples/libritts/7127_75947_000010_000000.TextGrid:
--------------------------------------------------------------------------------
  1 | File type = "ooTextFile"
  2 | Object class = "TextGrid"
  3 | 
  4 | xmin = 0.0
  5 | xmax = 5.1
  6 | tiers? <exists>
  7 | size = 2
  8 | item []:
  9 | 	item [1]:
 10 | 		class = "IntervalTier"
 11 | 		name = "words"
 12 | 		xmin = 0.0
 13 | 		xmax = 5.1
 14 | 		intervals: size = 17
 15 | 			intervals [1]:
 16 | 				xmin = 0.000
 17 | 				xmax = 0.680
 18 | 				text = "yes"
 19 | 			intervals [2]:
 20 | 				xmin = 0.680
 21 | 				xmax = 0.750
 22 | 				text = ""
 23 | 			intervals [3]:
 24 | 				xmin = 0.750
 25 | 				xmax = 0.890
 26 | 				text = "the"
 27 | 			intervals [4]:
 28 | 				xmin = 0.890
 29 | 				xmax = 1.450
 30 | 				text = "character"
 31 | 			intervals [5]:
 32 | 				xmin = 1.450
 33 | 				xmax = 1.670
 34 | 				text = "which"
 35 | 			intervals [6]:
 36 | 				xmin = 1.670
 37 | 				xmax = 1.880
 38 | 				text = "your"
 39 | 			intervals [7]:
 40 | 				xmin = 1.880
 41 | 				xmax = 2.250
 42 | 				text = "royal"
 43 | 			intervals [8]:
 44 | 				xmin = 2.250
 45 | 				xmax = 2.660
 46 | 				text = "highness"
 47 | 			intervals [9]:
 48 | 				xmin = 2.660
 49 | 				xmax = 3.190
 50 | 				text = "assumed"
 51 | 			intervals [10]:
 52 | 				xmin = 3.190
 53 | 				xmax = 3.380
 54 | 				text = "is"
 55 | 			intervals [11]:
 56 | 				xmin = 3.380
 57 | 				xmax = 3.520
 58 | 				text = "in"
 59 | 			intervals [12]:
 60 | 				xmin = 3.520
 61 | 				xmax = 3.960
 62 | 				text = "perfect"
 63 | 			intervals [13]:
 64 | 				xmin = 3.960
 65 | 				xmax = 4.380
 66 | 				text = "harmony"
 67 | 			intervals [14]:
 68 | 				xmin = 4.380
 69 | 				xmax = 4.550
 70 | 				text = "with"
 71 | 			intervals [15]:
 72 | 				xmin = 4.550
 73 | 				xmax = 4.710
 74 | 				text = "your"
 75 | 			intervals [16]:
 76 | 				xmin = 4.710
 77 | 				xmax = 5.080
 78 | 				text = "own"
 79 | 			intervals [17]:
 80 | 				xmin = 5.080
 81 | 				xmax = 5.1
 82 | 				text = ""
 83 | 	item [2]:
 84 | 		class = "IntervalTier"
 85 | 		name = "phones"
 86 | 		xmin = 0.0
 87 | 		xmax = 5.1
 88 | 		intervals: size = 59
 89 | 			intervals [1]:
 90 | 				xmin = 0.000
 91 | 				xmax = 0.170
 92 | 				text = "Y"
 93 | 			intervals [2]:
 94 | 				xmin = 0.170
 95 | 				xmax = 0.400
 96 | 				text = "EH1"
 97 | 			intervals [3]:
 98 | 				xmin = 0.400
 99 | 				xmax = 0.680
100 | 				text = "S"
101 | 			intervals [4]:
102 | 				xmin = 0.680
103 | 				xmax = 0.750
104 | 				text = "sp"
105 | 			intervals [5]:
106 | 				xmin = 0.750
107 | 				xmax = 0.810
108 | 				text = "DH"
109 | 			intervals [6]:
110 | 				xmin = 0.810
111 | 				xmax = 0.890
112 | 				text = "AH0"
113 | 			intervals [7]:
114 | 				xmin = 0.890
115 | 				xmax = 1.030
116 | 				text = "K"
117 | 			intervals [8]:
118 | 				xmin = 1.030
119 | 				xmax = 1.080
120 | 				text = "EH1"
121 | 			intervals [9]:
122 | 				xmin = 1.080
123 | 				xmax = 1.180
124 | 				text = "R"
125 | 			intervals [10]:
126 | 				xmin = 1.180
127 | 				xmax = 1.240
128 | 				text = "IH0"
129 | 			intervals [11]:
130 | 				xmin = 1.240
131 | 				xmax = 1.280
132 | 				text = "K"
133 | 			intervals [12]:
134 | 				xmin = 1.280
135 | 				xmax = 1.350
136 | 				text = "T"
137 | 			intervals [13]:
138 | 				xmin = 1.350
139 | 				xmax = 1.450
140 | 				text = "ER0"
141 | 			intervals [14]:
142 | 				xmin = 1.450
143 | 				xmax = 1.510
144 | 				text = "W"
145 | 			intervals [15]:
146 | 				xmin = 1.510
147 | 				xmax = 1.570
148 | 				text = "IH1"
149 | 			intervals [16]:
150 | 				xmin = 1.570
151 | 				xmax = 1.670
152 | 				text = "CH"
153 | 			intervals [17]:
154 | 				xmin = 1.670
155 | 				xmax = 1.700
156 | 				text = "Y"
157 | 			intervals [18]:
158 | 				xmin = 1.700
159 | 				xmax = 1.790
160 | 				text = "AO1"
161 | 			intervals [19]:
162 | 				xmin = 1.790
163 | 				xmax = 1.880
164 | 				text = "R"
165 | 			intervals [20]:
166 | 				xmin = 1.880
167 | 				xmax = 2.020
168 | 				text = "R"
169 | 			intervals [21]:
170 | 				xmin = 2.020
171 | 				xmax = 2.140
172 | 				text = "OY1"
173 | 			intervals [22]:
174 | 				xmin = 2.140
175 | 				xmax = 2.180
176 | 				text = "AH0"
177 | 			intervals [23]:
178 | 				xmin = 2.180
179 | 				xmax = 2.250
180 | 				text = "L"
181 | 			intervals [24]:
182 | 				xmin = 2.250
183 | 				xmax = 2.330
184 | 				text = "HH"
185 | 			intervals [25]:
186 | 				xmin = 2.330
187 | 				xmax = 2.440
188 | 				text = "AY1"
189 | 			intervals [26]:
190 | 				xmin = 2.440
191 | 				xmax = 2.480
192 | 				text = "N"
193 | 			intervals [27]:
194 | 				xmin = 2.480
195 | 				xmax = 2.570
196 | 				text = "AH0"
197 | 			intervals [28]:
198 | 				xmin = 2.570
199 | 				xmax = 2.660
200 | 				text = "S"
201 | 			intervals [29]:
202 | 				xmin = 2.660
203 | 				xmax = 2.740
204 | 				text = "AH0"
205 | 			intervals [30]:
206 | 				xmin = 2.740
207 | 				xmax = 2.890
208 | 				text = "S"
209 | 			intervals [31]:
210 | 				xmin = 2.890
211 | 				xmax = 3.080
212 | 				text = "UW1"
213 | 			intervals [32]:
214 | 				xmin = 3.080
215 | 				xmax = 3.150
216 | 				text = "M"
217 | 			intervals [33]:
218 | 				xmin = 3.150
219 | 				xmax = 3.190
220 | 				text = "D"
221 | 			intervals [34]:
222 | 				xmin = 3.190
223 | 				xmax = 3.300
224 | 				text = "IH1"
225 | 			intervals [35]:
226 | 				xmin = 3.300
227 | 				xmax = 3.380
228 | 				text = "Z"
229 | 			intervals [36]:
230 | 				xmin = 3.380
231 | 				xmax = 3.450
232 | 				text = "IH0"
233 | 			intervals [37]:
234 | 				xmin = 3.450
235 | 				xmax = 3.520
236 | 				text = "N"
237 | 			intervals [38]:
238 | 				xmin = 3.520
239 | 				xmax = 3.670
240 | 				text = "P"
241 | 			intervals [39]:
242 | 				xmin = 3.670
243 | 				xmax = 3.740
244 | 				text = "ER1"
245 | 			intervals [40]:
246 | 				xmin = 3.740
247 | 				xmax = 3.820
248 | 				text = "F"
249 | 			intervals [41]:
250 | 				xmin = 3.820
251 | 				xmax = 3.870
252 | 				text = "IH2"
253 | 			intervals [42]:
254 | 				xmin = 3.870
255 | 				xmax = 3.920
256 | 				text = "K"
257 | 			intervals [43]:
258 | 				xmin = 3.920
259 | 				xmax = 3.960
260 | 				text = "T"
261 | 			intervals [44]:
262 | 				xmin = 3.960
263 | 				xmax = 4.030
264 | 				text = "HH"
265 | 			intervals [45]:
266 | 				xmin = 4.030
267 | 				xmax = 4.070
268 | 				text = "AA1"
269 | 			intervals [46]:
270 | 				xmin = 4.070
271 | 				xmax = 4.150
272 | 				text = "R"
273 | 			intervals [47]:
274 | 				xmin = 4.150
275 | 				xmax = 4.200
276 | 				text = "M"
277 | 			intervals [48]:
278 | 				xmin = 4.200
279 | 				xmax = 4.240
280 | 				text = "AH0"
281 | 			intervals [49]:
282 | 				xmin = 4.240
283 | 				xmax = 4.290
284 | 				text = "N"
285 | 			intervals [50]:
286 | 				xmin = 4.290
287 | 				xmax = 4.380
288 | 				text = "IY0"
289 | 			intervals [51]:
290 | 				xmin = 4.380
291 | 				xmax = 4.430
292 | 				text = "W"
293 | 			intervals [52]:
294 | 				xmin = 4.430
295 | 				xmax = 4.470
296 | 				text = "IH0"
297 | 			intervals [53]:
298 | 				xmin = 4.470
299 | 				xmax = 4.550
300 | 				text = "TH"
301 | 			intervals [54]:
302 | 				xmin = 4.550
303 | 				xmax = 4.590
304 | 				text = "Y"
305 | 			intervals [55]:
306 | 				xmin = 4.590
307 | 				xmax = 4.620
308 | 				text = "UH1"
309 | 			intervals [56]:
310 | 				xmin = 4.620
311 | 				xmax = 4.710
312 | 				text = "R"
313 | 			intervals [57]:
314 | 				xmin = 4.710
315 | 				xmax = 4.890
316 | 				text = "OW1"
317 | 			intervals [58]:
318 | 				xmin = 4.890
319 | 				xmax = 5.080
320 | 				text = "N"
321 | 			intervals [59]:
322 | 				xmin = 5.080
323 | 				xmax = 5.1
324 | 				text = ""
325 | 


--------------------------------------------------------------------------------
/samples/libritts/7127_75947_000010_000000.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/samples/libritts/7127_75947_000010_000000.wav


--------------------------------------------------------------------------------
/samples/libritts/LJ050-0276.TextGrid:
--------------------------------------------------------------------------------
  1 | File type = "ooTextFile"
  2 | Object class = "TextGrid"
  3 | 
  4 | xmin = 0.0
  5 | xmax = 8.563673469387755
  6 | tiers? <exists>
  7 | size = 2
  8 | item []:
  9 | 	item [1]:
 10 | 		class = "IntervalTier"
 11 | 		name = "words"
 12 | 		xmin = 0.0
 13 | 		xmax = 8.563673469387755
 14 | 		intervals: size = 26
 15 | 			intervals [1]:
 16 | 				xmin = 0.000
 17 | 				xmax = 0.180
 18 | 				text = "as"
 19 | 			intervals [2]:
 20 | 				xmin = 0.180
 21 | 				xmax = 0.460
 22 | 				text = "has"
 23 | 			intervals [3]:
 24 | 				xmin = 0.460
 25 | 				xmax = 0.660
 26 | 				text = "been"
 27 | 			intervals [4]:
 28 | 				xmin = 0.660
 29 | 				xmax = 1.070
 30 | 				text = "pointed"
 31 | 			intervals [5]:
 32 | 				xmin = 1.070
 33 | 				xmax = 1.430
 34 | 				text = "out"
 35 | 			intervals [6]:
 36 | 				xmin = 1.430
 37 | 				xmax = 1.790
 38 | 				text = ""
 39 | 			intervals [7]:
 40 | 				xmin = 1.790
 41 | 				xmax = 1.900
 42 | 				text = "the"
 43 | 			intervals [8]:
 44 | 				xmin = 1.900
 45 | 				xmax = 2.330
 46 | 				text = "commission"
 47 | 			intervals [9]:
 48 | 				xmin = 2.330
 49 | 				xmax = 2.510
 50 | 				text = "has"
 51 | 			intervals [10]:
 52 | 				xmin = 2.510
 53 | 				xmax = 2.780
 54 | 				text = "not"
 55 | 			intervals [11]:
 56 | 				xmin = 2.780
 57 | 				xmax = 3.350
 58 | 				text = "resolved"
 59 | 			intervals [12]:
 60 | 				xmin = 3.350
 61 | 				xmax = 3.580
 62 | 				text = "all"
 63 | 			intervals [13]:
 64 | 				xmin = 3.580
 65 | 				xmax = 3.690
 66 | 				text = "the"
 67 | 			intervals [14]:
 68 | 				xmin = 3.690
 69 | 				xmax = 4.420
 70 | 				text = "proposals"
 71 | 			intervals [15]:
 72 | 				xmin = 4.420
 73 | 				xmax = 4.650
 74 | 				text = "which"
 75 | 			intervals [16]:
 76 | 				xmin = 4.650
 77 | 				xmax = 4.810
 78 | 				text = "could"
 79 | 			intervals [17]:
 80 | 				xmin = 4.810
 81 | 				xmax = 4.970
 82 | 				text = "be"
 83 | 			intervals [18]:
 84 | 				xmin = 4.970
 85 | 				xmax = 5.450
 86 | 				text = "made"
 87 | 			intervals [19]:
 88 | 				xmin = 5.450
 89 | 				xmax = 6.150
 90 | 				text = ""
 91 | 			intervals [20]:
 92 | 				xmin = 6.150
 93 | 				xmax = 6.250
 94 | 				text = "the"
 95 | 			intervals [21]:
 96 | 				xmin = 6.250
 97 | 				xmax = 6.740
 98 | 				text = "commission"
 99 | 			intervals [22]:
100 | 				xmin = 6.740
101 | 				xmax = 7.370
102 | 				text = "nevertheless"
103 | 			intervals [23]:
104 | 				xmin = 7.370
105 | 				xmax = 7.510
106 | 				text = "is"
107 | 			intervals [24]:
108 | 				xmin = 7.510
109 | 				xmax = 8.180
110 | 				text = "confident"
111 | 			intervals [25]:
112 | 				xmin = 8.180
113 | 				xmax = 8.470
114 | 				text = "that"
115 | 			intervals [26]:
116 | 				xmin = 8.470
117 | 				xmax = 8.563673469387755
118 | 				text = ""
119 | 	item [2]:
120 | 		class = "IntervalTier"
121 | 		name = "phones"
122 | 		xmin = 0.0
123 | 		xmax = 8.563673469387755
124 | 		intervals: size = 97
125 | 			intervals [1]:
126 | 				xmin = 0.000
127 | 				xmax = 0.110
128 | 				text = "ae"
129 | 			intervals [2]:
130 | 				xmin = 0.110
131 | 				xmax = 0.180
132 | 				text = "z"
133 | 			intervals [3]:
134 | 				xmin = 0.180
135 | 				xmax = 0.270
136 | 				text = "hh"
137 | 			intervals [4]:
138 | 				xmin = 0.270
139 | 				xmax = 0.370
140 | 				text = "ae"
141 | 			intervals [5]:
142 | 				xmin = 0.370
143 | 				xmax = 0.460
144 | 				text = "z"
145 | 			intervals [6]:
146 | 				xmin = 0.460
147 | 				xmax = 0.510
148 | 				text = "b"
149 | 			intervals [7]:
150 | 				xmin = 0.510
151 | 				xmax = 0.580
152 | 				text = "ih"
153 | 			intervals [8]:
154 | 				xmin = 0.580
155 | 				xmax = 0.660
156 | 				text = "n"
157 | 			intervals [9]:
158 | 				xmin = 0.660
159 | 				xmax = 0.740
160 | 				text = "p"
161 | 			intervals [10]:
162 | 				xmin = 0.740
163 | 				xmax = 0.870
164 | 				text = "oy"
165 | 			intervals [11]:
166 | 				xmin = 0.870
167 | 				xmax = 0.900
168 | 				text = "n"
169 | 			intervals [12]:
170 | 				xmin = 0.900
171 | 				xmax = 0.960
172 | 				text = "t"
173 | 			intervals [13]:
174 | 				xmin = 0.960
175 | 				xmax = 0.990
176 | 				text = "ax"
177 | 			intervals [14]:
178 | 				xmin = 0.990
179 | 				xmax = 1.070
180 | 				text = "d"
181 | 			intervals [15]:
182 | 				xmin = 1.070
183 | 				xmax = 1.340
184 | 				text = "aw"
185 | 			intervals [16]:
186 | 				xmin = 1.340
187 | 				xmax = 1.430
188 | 				text = "t"
189 | 			intervals [17]:
190 | 				xmin = 1.430
191 | 				xmax = 1.790
192 | 				text = "sp"
193 | 			intervals [18]:
194 | 				xmin = 1.790
195 | 				xmax = 1.840
196 | 				text = "dh"
197 | 			intervals [19]:
198 | 				xmin = 1.840
199 | 				xmax = 1.900
200 | 				text = "ax"
201 | 			intervals [20]:
202 | 				xmin = 1.900
203 | 				xmax = 1.980
204 | 				text = "k"
205 | 			intervals [21]:
206 | 				xmin = 1.980
207 | 				xmax = 2.020
208 | 				text = "ax"
209 | 			intervals [22]:
210 | 				xmin = 2.020
211 | 				xmax = 2.080
212 | 				text = "m"
213 | 			intervals [23]:
214 | 				xmin = 2.080
215 | 				xmax = 2.140
216 | 				text = "ih"
217 | 			intervals [24]:
218 | 				xmin = 2.140
219 | 				xmax = 2.240
220 | 				text = "sh"
221 | 			intervals [25]:
222 | 				xmin = 2.240
223 | 				xmax = 2.280
224 | 				text = "ax"
225 | 			intervals [26]:
226 | 				xmin = 2.280
227 | 				xmax = 2.330
228 | 				text = "n"
229 | 			intervals [27]:
230 | 				xmin = 2.330
231 | 				xmax = 2.400
232 | 				text = "hh"
233 | 			intervals [28]:
234 | 				xmin = 2.400
235 | 				xmax = 2.440
236 | 				text = "ax"
237 | 			intervals [29]:
238 | 				xmin = 2.440
239 | 				xmax = 2.510
240 | 				text = "z"
241 | 			intervals [30]:
242 | 				xmin = 2.510
243 | 				xmax = 2.590
244 | 				text = "n"
245 | 			intervals [31]:
246 | 				xmin = 2.590
247 | 				xmax = 2.740
248 | 				text = "aa"
249 | 			intervals [32]:
250 | 				xmin = 2.740
251 | 				xmax = 2.780
252 | 				text = "t"
253 | 			intervals [33]:
254 | 				xmin = 2.780
255 | 				xmax = 2.840
256 | 				text = "r"
257 | 			intervals [34]:
258 | 				xmin = 2.840
259 | 				xmax = 2.870
260 | 				text = "iy"
261 | 			intervals [35]:
262 | 				xmin = 2.870
263 | 				xmax = 2.970
264 | 				text = "z"
265 | 			intervals [36]:
266 | 				xmin = 2.970
267 | 				xmax = 3.140
268 | 				text = "aa"
269 | 			intervals [37]:
270 | 				xmin = 3.140
271 | 				xmax = 3.220
272 | 				text = "l"
273 | 			intervals [38]:
274 | 				xmin = 3.220
275 | 				xmax = 3.270
276 | 				text = "v"
277 | 			intervals [39]:
278 | 				xmin = 3.270
279 | 				xmax = 3.350
280 | 				text = "d"
281 | 			intervals [40]:
282 | 				xmin = 3.350
283 | 				xmax = 3.520
284 | 				text = "ao"
285 | 			intervals [41]:
286 | 				xmin = 3.520
287 | 				xmax = 3.580
288 | 				text = "l"
289 | 			intervals [42]:
290 | 				xmin = 3.580
291 | 				xmax = 3.630
292 | 				text = "dh"
293 | 			intervals [43]:
294 | 				xmin = 3.630
295 | 				xmax = 3.690
296 | 				text = "ax"
297 | 			intervals [44]:
298 | 				xmin = 3.690
299 | 				xmax = 3.750
300 | 				text = "p"
301 | 			intervals [45]:
302 | 				xmin = 3.750
303 | 				xmax = 3.780
304 | 				text = "r"
305 | 			intervals [46]:
306 | 				xmin = 3.780
307 | 				xmax = 3.820
308 | 				text = "ax"
309 | 			intervals [47]:
310 | 				xmin = 3.820
311 | 				xmax = 3.940
312 | 				text = "p"
313 | 			intervals [48]:
314 | 				xmin = 3.940
315 | 				xmax = 4.100
316 | 				text = "ow"
317 | 			intervals [49]:
318 | 				xmin = 4.100
319 | 				xmax = 4.170
320 | 				text = "z"
321 | 			intervals [50]:
322 | 				xmin = 4.170
323 | 				xmax = 4.200
324 | 				text = "ax"
325 | 			intervals [51]:
326 | 				xmin = 4.200
327 | 				xmax = 4.310
328 | 				text = "l"
329 | 			intervals [52]:
330 | 				xmin = 4.310
331 | 				xmax = 4.420
332 | 				text = "z"
333 | 			intervals [53]:
334 | 				xmin = 4.420
335 | 				xmax = 4.490
336 | 				text = "w"
337 | 			intervals [54]:
338 | 				xmin = 4.490
339 | 				xmax = 4.550
340 | 				text = "ih"
341 | 			intervals [55]:
342 | 				xmin = 4.550
343 | 				xmax = 4.650
344 | 				text = "ch"
345 | 			intervals [56]:
346 | 				xmin = 4.650
347 | 				xmax = 4.720
348 | 				text = "k"
349 | 			intervals [57]:
350 | 				xmin = 4.720
351 | 				xmax = 4.780
352 | 				text = "uh"
353 | 			intervals [58]:
354 | 				xmin = 4.780
355 | 				xmax = 4.810
356 | 				text = "d"
357 | 			intervals [59]:
358 | 				xmin = 4.810
359 | 				xmax = 4.870
360 | 				text = "b"
361 | 			intervals [60]:
362 | 				xmin = 4.870
363 | 				xmax = 4.970
364 | 				text = "iy"
365 | 			intervals [61]:
366 | 				xmin = 4.970
367 | 				xmax = 5.060
368 | 				text = "m"
369 | 			intervals [62]:
370 | 				xmin = 5.060
371 | 				xmax = 5.360
372 | 				text = "ey"
373 | 			intervals [63]:
374 | 				xmin = 5.360
375 | 				xmax = 5.450
376 | 				text = "d"
377 | 			intervals [64]:
378 | 				xmin = 5.450
379 | 				xmax = 6.150
380 | 				text = "sp"
381 | 			intervals [65]:
382 | 				xmin = 6.150
383 | 				xmax = 6.210
384 | 				text = "dh"
385 | 			intervals [66]:
386 | 				xmin = 6.210
387 | 				xmax = 6.250
388 | 				text = "ax"
389 | 			intervals [67]:
390 | 				xmin = 6.250
391 | 				xmax = 6.340
392 | 				text = "k"
393 | 			intervals [68]:
394 | 				xmin = 6.340
395 | 				xmax = 6.390
396 | 				text = "ax"
397 | 			intervals [69]:
398 | 				xmin = 6.390
399 | 				xmax = 6.460
400 | 				text = "m"
401 | 			intervals [70]:
402 | 				xmin = 6.460
403 | 				xmax = 6.520
404 | 				text = "ih"
405 | 			intervals [71]:
406 | 				xmin = 6.520
407 | 				xmax = 6.630
408 | 				text = "sh"
409 | 			intervals [72]:
410 | 				xmin = 6.630
411 | 				xmax = 6.710
412 | 				text = "ax"
413 | 			intervals [73]:
414 | 				xmin = 6.710
415 | 				xmax = 6.740
416 | 				text = "n"
417 | 			intervals [74]:
418 | 				xmin = 6.740
419 | 				xmax = 6.800
420 | 				text = "n"
421 | 			intervals [75]:
422 | 				xmin = 6.800
423 | 				xmax = 6.880
424 | 				text = "eh"
425 | 			intervals [76]:
426 | 				xmin = 6.880
427 | 				xmax = 6.920
428 | 				text = "v"
429 | 			intervals [77]:
430 | 				xmin = 6.920
431 | 				xmax = 7.000
432 | 				text = "er"
433 | 			intervals [78]:
434 | 				xmin = 7.000
435 | 				xmax = 7.030
436 | 				text = "dh"
437 | 			intervals [79]:
438 | 				xmin = 7.030
439 | 				xmax = 7.100
440 | 				text = "ax"
441 | 			intervals [80]:
442 | 				xmin = 7.100
443 | 				xmax = 7.180
444 | 				text = "l"
445 | 			intervals [81]:
446 | 				xmin = 7.180
447 | 				xmax = 7.270
448 | 				text = "eh"
449 | 			intervals [82]:
450 | 				xmin = 7.270
451 | 				xmax = 7.370
452 | 				text = "s"
453 | 			intervals [83]:
454 | 				xmin = 7.370
455 | 				xmax = 7.400
456 | 				text = "ax"
457 | 			intervals [84]:
458 | 				xmin = 7.400
459 | 				xmax = 7.510
460 | 				text = "z"
461 | 			intervals [85]:
462 | 				xmin = 7.510
463 | 				xmax = 7.610
464 | 				text = "k"
465 | 			intervals [86]:
466 | 				xmin = 7.610
467 | 				xmax = 7.730
468 | 				text = "aa"
469 | 			intervals [87]:
470 | 				xmin = 7.730
471 | 				xmax = 7.780
472 | 				text = "n"
473 | 			intervals [88]:
474 | 				xmin = 7.780
475 | 				xmax = 7.880
476 | 				text = "f"
477 | 			intervals [89]:
478 | 				xmin = 7.880
479 | 				xmax = 7.910
480 | 				text = "ax"
481 | 			intervals [90]:
482 | 				xmin = 7.910
483 | 				xmax = 7.950
484 | 				text = "d"
485 | 			intervals [91]:
486 | 				xmin = 7.950
487 | 				xmax = 8.050
488 | 				text = "ax"
489 | 			intervals [92]:
490 | 				xmin = 8.050
491 | 				xmax = 8.110
492 | 				text = "n"
493 | 			intervals [93]:
494 | 				xmin = 8.110
495 | 				xmax = 8.180
496 | 				text = "t"
497 | 			intervals [94]:
498 | 				xmin = 8.180
499 | 				xmax = 8.230
500 | 				text = "dh"
501 | 			intervals [95]:
502 | 				xmin = 8.230
503 | 				xmax = 8.390
504 | 				text = "ae"
505 | 			intervals [96]:
506 | 				xmin = 8.390
507 | 				xmax = 8.470
508 | 				text = "t"
509 | 			intervals [97]:
510 | 				xmin = 8.470
511 | 				xmax = 8.563673469387755
512 | 				text = "sp"
513 | 


--------------------------------------------------------------------------------
/samples/libritts/LJ050-0276.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/samples/libritts/LJ050-0276.wav


--------------------------------------------------------------------------------
/samples/libritts/LJ050-0277.TextGrid:
--------------------------------------------------------------------------------
  1 | File type = "ooTextFile"
  2 | Object class = "TextGrid"
  3 | 
  4 | xmin = 0.0
  5 | xmax = 8.714603174603175
  6 | tiers? <exists>
  7 | size = 2
  8 | item []:
  9 | 	item [1]:
 10 | 		class = "IntervalTier"
 11 | 		name = "words"
 12 | 		xmin = 0.0
 13 | 		xmax = 8.714603174603175
 14 | 		intervals: size = 27
 15 | 			intervals [1]:
 16 | 				xmin = 0.000
 17 | 				xmax = 0.170
 18 | 				text = "with"
 19 | 			intervals [2]:
 20 | 				xmin = 0.170
 21 | 				xmax = 0.340
 22 | 				text = "the"
 23 | 			intervals [3]:
 24 | 				xmin = 0.340
 25 | 				xmax = 0.740
 26 | 				text = "active"
 27 | 			intervals [4]:
 28 | 				xmin = 0.740
 29 | 				xmax = 1.590
 30 | 				text = "cooperation"
 31 | 			intervals [5]:
 32 | 				xmin = 1.590
 33 | 				xmax = 1.710
 34 | 				text = "of"
 35 | 			intervals [6]:
 36 | 				xmin = 1.710
 37 | 				xmax = 1.810
 38 | 				text = "the"
 39 | 			intervals [7]:
 40 | 				xmin = 1.810
 41 | 				xmax = 2.620
 42 | 				text = "responsible"
 43 | 			intervals [8]:
 44 | 				xmin = 2.620
 45 | 				xmax = 3.550
 46 | 				text = "agencies"
 47 | 			intervals [9]:
 48 | 				xmin = 3.550
 49 | 				xmax = 3.710
 50 | 				text = ""
 51 | 			intervals [10]:
 52 | 				xmin = 3.710
 53 | 				xmax = 4.000
 54 | 				text = "and"
 55 | 			intervals [11]:
 56 | 				xmin = 4.000
 57 | 				xmax = 4.170
 58 | 				text = "with"
 59 | 			intervals [12]:
 60 | 				xmin = 4.170
 61 | 				xmax = 4.310
 62 | 				text = "the"
 63 | 			intervals [13]:
 64 | 				xmin = 4.310
 65 | 				xmax = 5.030
 66 | 				text = "understanding"
 67 | 			intervals [14]:
 68 | 				xmin = 5.030
 69 | 				xmax = 5.130
 70 | 				text = "of"
 71 | 			intervals [15]:
 72 | 				xmin = 5.130
 73 | 				xmax = 5.240
 74 | 				text = "the"
 75 | 			intervals [16]:
 76 | 				xmin = 5.240
 77 | 				xmax = 5.590
 78 | 				text = "people"
 79 | 			intervals [17]:
 80 | 				xmin = 5.590
 81 | 				xmax = 5.700
 82 | 				text = "of"
 83 | 			intervals [18]:
 84 | 				xmin = 5.700
 85 | 				xmax = 5.780
 86 | 				text = "the"
 87 | 			intervals [19]:
 88 | 				xmin = 5.780
 89 | 				xmax = 6.240
 90 | 				text = "united"
 91 | 			intervals [20]:
 92 | 				xmin = 6.240
 93 | 				xmax = 6.720
 94 | 				text = "states"
 95 | 			intervals [21]:
 96 | 				xmin = 6.720
 97 | 				xmax = 6.820
 98 | 				text = "in"
 99 | 			intervals [22]:
100 | 				xmin = 6.820
101 | 				xmax = 7.000
102 | 				text = "their"
103 | 			intervals [23]:
104 | 				xmin = 7.000
105 | 				xmax = 7.450
106 | 				text = "demands"
107 | 			intervals [24]:
108 | 				xmin = 7.450
109 | 				xmax = 7.810
110 | 				text = "upon"
111 | 			intervals [25]:
112 | 				xmin = 7.810
113 | 				xmax = 7.980
114 | 				text = "their"
115 | 			intervals [26]:
116 | 				xmin = 7.980
117 | 				xmax = 8.620
118 | 				text = "president"
119 | 			intervals [27]:
120 | 				xmin = 8.620
121 | 				xmax = 8.714603174603175
122 | 				text = ""
123 | 	item [2]:
124 | 		class = "IntervalTier"
125 | 		name = "phones"
126 | 		xmin = 0.0
127 | 		xmax = 8.714603174603175
128 | 		intervals: size = 115
129 | 			intervals [1]:
130 | 				xmin = 0.000
131 | 				xmax = 0.060
132 | 				text = "w"
133 | 			intervals [2]:
134 | 				xmin = 0.060
135 | 				xmax = 0.140
136 | 				text = "ih"
137 | 			intervals [3]:
138 | 				xmin = 0.140
139 | 				xmax = 0.170
140 | 				text = "dh"
141 | 			intervals [4]:
142 | 				xmin = 0.170
143 | 				xmax = 0.200
144 | 				text = "dh"
145 | 			intervals [5]:
146 | 				xmin = 0.200
147 | 				xmax = 0.340
148 | 				text = "ax"
149 | 			intervals [6]:
150 | 				xmin = 0.340
151 | 				xmax = 0.540
152 | 				text = "ae"
153 | 			intervals [7]:
154 | 				xmin = 0.540
155 | 				xmax = 0.570
156 | 				text = "k"
157 | 			intervals [8]:
158 | 				xmin = 0.570
159 | 				xmax = 0.610
160 | 				text = "t"
161 | 			intervals [9]:
162 | 				xmin = 0.610
163 | 				xmax = 0.660
164 | 				text = "ax"
165 | 			intervals [10]:
166 | 				xmin = 0.660
167 | 				xmax = 0.740
168 | 				text = "v"
169 | 			intervals [11]:
170 | 				xmin = 0.740
171 | 				xmax = 0.850
172 | 				text = "k"
173 | 			intervals [12]:
174 | 				xmin = 0.850
175 | 				xmax = 0.940
176 | 				text = "ow"
177 | 			intervals [13]:
178 | 				xmin = 0.940
179 | 				xmax = 1.080
180 | 				text = "aa"
181 | 			intervals [14]:
182 | 				xmin = 1.080
183 | 				xmax = 1.140
184 | 				text = "p"
185 | 			intervals [15]:
186 | 				xmin = 1.140
187 | 				xmax = 1.270
188 | 				text = "er"
189 | 			intervals [16]:
190 | 				xmin = 1.270
191 | 				xmax = 1.380
192 | 				text = "ey"
193 | 			intervals [17]:
194 | 				xmin = 1.380
195 | 				xmax = 1.520
196 | 				text = "sh"
197 | 			intervals [18]:
198 | 				xmin = 1.520
199 | 				xmax = 1.550
200 | 				text = "ax"
201 | 			intervals [19]:
202 | 				xmin = 1.550
203 | 				xmax = 1.590
204 | 				text = "n"
205 | 			intervals [20]:
206 | 				xmin = 1.590
207 | 				xmax = 1.660
208 | 				text = "ah"
209 | 			intervals [21]:
210 | 				xmin = 1.660
211 | 				xmax = 1.710
212 | 				text = "v"
213 | 			intervals [22]:
214 | 				xmin = 1.710
215 | 				xmax = 1.740
216 | 				text = "dh"
217 | 			intervals [23]:
218 | 				xmin = 1.740
219 | 				xmax = 1.810
220 | 				text = "ax"
221 | 			intervals [24]:
222 | 				xmin = 1.810
223 | 				xmax = 1.860
224 | 				text = "r"
225 | 			intervals [25]:
226 | 				xmin = 1.860
227 | 				xmax = 1.920
228 | 				text = "iy"
229 | 			intervals [26]:
230 | 				xmin = 1.920
231 | 				xmax = 2.030
232 | 				text = "s"
233 | 			intervals [27]:
234 | 				xmin = 2.030
235 | 				xmax = 2.080
236 | 				text = "p"
237 | 			intervals [28]:
238 | 				xmin = 2.080
239 | 				xmax = 2.190
240 | 				text = "aa"
241 | 			intervals [29]:
242 | 				xmin = 2.190
243 | 				xmax = 2.260
244 | 				text = "n"
245 | 			intervals [30]:
246 | 				xmin = 2.260
247 | 				xmax = 2.320
248 | 				text = "s"
249 | 			intervals [31]:
250 | 				xmin = 2.320
251 | 				xmax = 2.370
252 | 				text = "ax"
253 | 			intervals [32]:
254 | 				xmin = 2.370
255 | 				xmax = 2.400
256 | 				text = "b"
257 | 			intervals [33]:
258 | 				xmin = 2.400
259 | 				xmax = 2.440
260 | 				text = "ax"
261 | 			intervals [34]:
262 | 				xmin = 2.440
263 | 				xmax = 2.620
264 | 				text = "l"
265 | 			intervals [35]:
266 | 				xmin = 2.620
267 | 				xmax = 2.770
268 | 				text = "ey"
269 | 			intervals [36]:
270 | 				xmin = 2.770
271 | 				xmax = 2.840
272 | 				text = "jh"
273 | 			intervals [37]:
274 | 				xmin = 2.840
275 | 				xmax = 2.880
276 | 				text = "ax"
277 | 			intervals [38]:
278 | 				xmin = 2.880
279 | 				xmax = 2.940
280 | 				text = "n"
281 | 			intervals [39]:
282 | 				xmin = 2.940
283 | 				xmax = 3.100
284 | 				text = "s"
285 | 			intervals [40]:
286 | 				xmin = 3.100
287 | 				xmax = 3.340
288 | 				text = "iy"
289 | 			intervals [41]:
290 | 				xmin = 3.340
291 | 				xmax = 3.550
292 | 				text = "z"
293 | 			intervals [42]:
294 | 				xmin = 3.550
295 | 				xmax = 3.710
296 | 				text = "sp"
297 | 			intervals [43]:
298 | 				xmin = 3.710
299 | 				xmax = 3.880
300 | 				text = "hh"
301 | 			intervals [44]:
302 | 				xmin = 3.880
303 | 				xmax = 3.930
304 | 				text = "ae"
305 | 			intervals [45]:
306 | 				xmin = 3.930
307 | 				xmax = 4.000
308 | 				text = "d"
309 | 			intervals [46]:
310 | 				xmin = 4.000
311 | 				xmax = 4.070
312 | 				text = "w"
313 | 			intervals [47]:
314 | 				xmin = 4.070
315 | 				xmax = 4.140
316 | 				text = "ih"
317 | 			intervals [48]:
318 | 				xmin = 4.140
319 | 				xmax = 4.170
320 | 				text = "dh"
321 | 			intervals [49]:
322 | 				xmin = 4.170
323 | 				xmax = 4.200
324 | 				text = "dh"
325 | 			intervals [50]:
326 | 				xmin = 4.200
327 | 				xmax = 4.310
328 | 				text = "ax"
329 | 			intervals [51]:
330 | 				xmin = 4.310
331 | 				xmax = 4.390
332 | 				text = "ah"
333 | 			intervals [52]:
334 | 				xmin = 4.390
335 | 				xmax = 4.420
336 | 				text = "n"
337 | 			intervals [53]:
338 | 				xmin = 4.420
339 | 				xmax = 4.460
340 | 				text = "d"
341 | 			intervals [54]:
342 | 				xmin = 4.460
343 | 				xmax = 4.520
344 | 				text = "er"
345 | 			intervals [55]:
346 | 				xmin = 4.520
347 | 				xmax = 4.620
348 | 				text = "s"
349 | 			intervals [56]:
350 | 				xmin = 4.620
351 | 				xmax = 4.680
352 | 				text = "t"
353 | 			intervals [57]:
354 | 				xmin = 4.680
355 | 				xmax = 4.820
356 | 				text = "ae"
357 | 			intervals [58]:
358 | 				xmin = 4.820
359 | 				xmax = 4.860
360 | 				text = "n"
361 | 			intervals [59]:
362 | 				xmin = 4.860
363 | 				xmax = 4.910
364 | 				text = "d"
365 | 			intervals [60]:
366 | 				xmin = 4.910
367 | 				xmax = 4.960
368 | 				text = "ax"
369 | 			intervals [61]:
370 | 				xmin = 4.960
371 | 				xmax = 5.030
372 | 				text = "ng"
373 | 			intervals [62]:
374 | 				xmin = 5.030
375 | 				xmax = 5.080
376 | 				text = "ax"
377 | 			intervals [63]:
378 | 				xmin = 5.080
379 | 				xmax = 5.130
380 | 				text = "v"
381 | 			intervals [64]:
382 | 				xmin = 5.130
383 | 				xmax = 5.170
384 | 				text = "dh"
385 | 			intervals [65]:
386 | 				xmin = 5.170
387 | 				xmax = 5.240
388 | 				text = "ax"
389 | 			intervals [66]:
390 | 				xmin = 5.240
391 | 				xmax = 5.320
392 | 				text = "p"
393 | 			intervals [67]:
394 | 				xmin = 5.320
395 | 				xmax = 5.440
396 | 				text = "iy"
397 | 			intervals [68]:
398 | 				xmin = 5.440
399 | 				xmax = 5.490
400 | 				text = "p"
401 | 			intervals [69]:
402 | 				xmin = 5.490
403 | 				xmax = 5.520
404 | 				text = "ax"
405 | 			intervals [70]:
406 | 				xmin = 5.520
407 | 				xmax = 5.590
408 | 				text = "l"
409 | 			intervals [71]:
410 | 				xmin = 5.590
411 | 				xmax = 5.660
412 | 				text = "ah"
413 | 			intervals [72]:
414 | 				xmin = 5.660
415 | 				xmax = 5.700
416 | 				text = "v"
417 | 			intervals [73]:
418 | 				xmin = 5.700
419 | 				xmax = 5.740
420 | 				text = "dh"
421 | 			intervals [74]:
422 | 				xmin = 5.740
423 | 				xmax = 5.780
424 | 				text = "ax"
425 | 			intervals [75]:
426 | 				xmin = 5.780
427 | 				xmax = 5.830
428 | 				text = "y"
429 | 			intervals [76]:
430 | 				xmin = 5.830
431 | 				xmax = 5.860
432 | 				text = "uw"
433 | 			intervals [77]:
434 | 				xmin = 5.860
435 | 				xmax = 5.930
436 | 				text = "n"
437 | 			intervals [78]:
438 | 				xmin = 5.930
439 | 				xmax = 6.040
440 | 				text = "ay"
441 | 			intervals [79]:
442 | 				xmin = 6.040
443 | 				xmax = 6.110
444 | 				text = "t"
445 | 			intervals [80]:
446 | 				xmin = 6.110
447 | 				xmax = 6.160
448 | 				text = "ax"
449 | 			intervals [81]:
450 | 				xmin = 6.160
451 | 				xmax = 6.240
452 | 				text = "d"
453 | 			intervals [82]:
454 | 				xmin = 6.240
455 | 				xmax = 6.330
456 | 				text = "s"
457 | 			intervals [83]:
458 | 				xmin = 6.330
459 | 				xmax = 6.400
460 | 				text = "t"
461 | 			intervals [84]:
462 | 				xmin = 6.400
463 | 				xmax = 6.590
464 | 				text = "ey"
465 | 			intervals [85]:
466 | 				xmin = 6.590
467 | 				xmax = 6.640
468 | 				text = "t"
469 | 			intervals [86]:
470 | 				xmin = 6.640
471 | 				xmax = 6.720
472 | 				text = "s"
473 | 			intervals [87]:
474 | 				xmin = 6.720
475 | 				xmax = 6.760
476 | 				text = "ax"
477 | 			intervals [88]:
478 | 				xmin = 6.760
479 | 				xmax = 6.820
480 | 				text = "n"
481 | 			intervals [89]:
482 | 				xmin = 6.820
483 | 				xmax = 6.870
484 | 				text = "dh"
485 | 			intervals [90]:
486 | 				xmin = 6.870
487 | 				xmax = 6.930
488 | 				text = "eh"
489 | 			intervals [91]:
490 | 				xmin = 6.930
491 | 				xmax = 7.000
492 | 				text = "r"
493 | 			intervals [92]:
494 | 				xmin = 7.000
495 | 				xmax = 7.040
496 | 				text = "d"
497 | 			intervals [93]:
498 | 				xmin = 7.040
499 | 				xmax = 7.080
500 | 				text = "ax"
501 | 			intervals [94]:
502 | 				xmin = 7.080
503 | 				xmax = 7.180
504 | 				text = "m"
505 | 			intervals [95]:
506 | 				xmin = 7.180
507 | 				xmax = 7.330
508 | 				text = "ae"
509 | 			intervals [96]:
510 | 				xmin = 7.330
511 | 				xmax = 7.360
512 | 				text = "n"
513 | 			intervals [97]:
514 | 				xmin = 7.360
515 | 				xmax = 7.400
516 | 				text = "d"
517 | 			intervals [98]:
518 | 				xmin = 7.400
519 | 				xmax = 7.450
520 | 				text = "z"
521 | 			intervals [99]:
522 | 				xmin = 7.450
523 | 				xmax = 7.540
524 | 				text = "ax"
525 | 			intervals [100]:
526 | 				xmin = 7.540
527 | 				xmax = 7.630
528 | 				text = "p"
529 | 			intervals [101]:
530 | 				xmin = 7.630
531 | 				xmax = 7.760
532 | 				text = "aa"
533 | 			intervals [102]:
534 | 				xmin = 7.760
535 | 				xmax = 7.810
536 | 				text = "n"
537 | 			intervals [103]:
538 | 				xmin = 7.810
539 | 				xmax = 7.850
540 | 				text = "dh"
541 | 			intervals [104]:
542 | 				xmin = 7.850
543 | 				xmax = 7.900
544 | 				text = "eh"
545 | 			intervals [105]:
546 | 				xmin = 7.900
547 | 				xmax = 7.980
548 | 				text = "r"
549 | 			intervals [106]:
550 | 				xmin = 7.980
551 | 				xmax = 8.060
552 | 				text = "p"
553 | 			intervals [107]:
554 | 				xmin = 8.060
555 | 				xmax = 8.120
556 | 				text = "r"
557 | 			intervals [108]:
558 | 				xmin = 8.120
559 | 				xmax = 8.180
560 | 				text = "eh"
561 | 			intervals [109]:
562 | 				xmin = 8.180
563 | 				xmax = 8.250
564 | 				text = "z"
565 | 			intervals [110]:
566 | 				xmin = 8.250
567 | 				xmax = 8.300
568 | 				text = "ax"
569 | 			intervals [111]:
570 | 				xmin = 8.300
571 | 				xmax = 8.370
572 | 				text = "d"
573 | 			intervals [112]:
574 | 				xmin = 8.370
575 | 				xmax = 8.460
576 | 				text = "ax"
577 | 			intervals [113]:
578 | 				xmin = 8.460
579 | 				xmax = 8.550
580 | 				text = "n"
581 | 			intervals [114]:
582 | 				xmin = 8.550
583 | 				xmax = 8.620
584 | 				text = "t"
585 | 			intervals [115]:
586 | 				xmin = 8.620
587 | 				xmax = 8.714603174603175
588 | 				text = "sp"
589 | 


--------------------------------------------------------------------------------
/samples/libritts/LJ050-0277.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/samples/libritts/LJ050-0277.wav


--------------------------------------------------------------------------------
/samples/libritts/LJ050-0278.TextGrid:
--------------------------------------------------------------------------------
  1 | File type = "ooTextFile"
  2 | Object class = "TextGrid"
  3 | 
  4 | xmin = 0.0
  5 | xmax = 8.923582766439909
  6 | tiers? <exists>
  7 | size = 2
  8 | item []:
  9 | 	item [1]:
 10 | 		class = "IntervalTier"
 11 | 		name = "words"
 12 | 		xmin = 0.0
 13 | 		xmax = 8.923582766439909
 14 | 		intervals: size = 24
 15 | 			intervals [1]:
 16 | 				xmin = 0.000
 17 | 				xmax = 0.100
 18 | 				text = "the"
 19 | 			intervals [2]:
 20 | 				xmin = 0.100
 21 | 				xmax = 0.910
 22 | 				text = "recommendations"
 23 | 			intervals [3]:
 24 | 				xmin = 0.910
 25 | 				xmax = 1.010
 26 | 				text = "we"
 27 | 			intervals [4]:
 28 | 				xmin = 1.010
 29 | 				xmax = 1.180
 30 | 				text = "have"
 31 | 			intervals [5]:
 32 | 				xmin = 1.180
 33 | 				xmax = 1.420
 34 | 				text = "here"
 35 | 			intervals [6]:
 36 | 				xmin = 1.420
 37 | 				xmax = 2.200
 38 | 				text = "suggested"
 39 | 			intervals [7]:
 40 | 				xmin = 2.200
 41 | 				xmax = 2.600
 42 | 				text = ""
 43 | 			intervals [8]:
 44 | 				xmin = 2.600
 45 | 				xmax = 2.780
 46 | 				text = "would"
 47 | 			intervals [9]:
 48 | 				xmin = 2.780
 49 | 				xmax = 3.270
 50 | 				text = "greatly"
 51 | 			intervals [10]:
 52 | 				xmin = 3.270
 53 | 				xmax = 3.740
 54 | 				text = "advance"
 55 | 			intervals [11]:
 56 | 				xmin = 3.740
 57 | 				xmax = 3.830
 58 | 				text = "the"
 59 | 			intervals [12]:
 60 | 				xmin = 3.830
 61 | 				xmax = 4.440
 62 | 				text = "security"
 63 | 			intervals [13]:
 64 | 				xmin = 4.440
 65 | 				xmax = 4.550
 66 | 				text = "of"
 67 | 			intervals [14]:
 68 | 				xmin = 4.550
 69 | 				xmax = 4.670
 70 | 				text = "the"
 71 | 			intervals [15]:
 72 | 				xmin = 4.670
 73 | 				xmax = 5.330
 74 | 				text = "office"
 75 | 			intervals [16]:
 76 | 				xmin = 5.330
 77 | 				xmax = 5.590
 78 | 				text = ""
 79 | 			intervals [17]:
 80 | 				xmin = 5.590
 81 | 				xmax = 5.940
 82 | 				text = "without"
 83 | 			intervals [18]:
 84 | 				xmin = 5.940
 85 | 				xmax = 6.170
 86 | 				text = "any"
 87 | 			intervals [19]:
 88 | 				xmin = 6.170
 89 | 				xmax = 6.830
 90 | 				text = "impairment"
 91 | 			intervals [20]:
 92 | 				xmin = 6.830
 93 | 				xmax = 7.080
 94 | 				text = ""
 95 | 			intervals [21]:
 96 | 				xmin = 7.080
 97 | 				xmax = 7.230
 98 | 				text = "of"
 99 | 			intervals [22]:
100 | 				xmin = 7.230
101 | 				xmax = 7.430
102 | 				text = "our"
103 | 			intervals [23]:
104 | 				xmin = 7.430
105 | 				xmax = 8.130
106 | 				text = "fundamental"
107 | 			intervals [24]:
108 | 				xmin = 8.130
109 | 				xmax = 8.923582766439909
110 | 				text = "liberties"
111 | 	item [2]:
112 | 		class = "IntervalTier"
113 | 		name = "phones"
114 | 		xmin = 0.0
115 | 		xmax = 8.923582766439909
116 | 		intervals: size = 108
117 | 			intervals [1]:
118 | 				xmin = 0.000
119 | 				xmax = 0.050
120 | 				text = "dh"
121 | 			intervals [2]:
122 | 				xmin = 0.050
123 | 				xmax = 0.100
124 | 				text = "ax"
125 | 			intervals [3]:
126 | 				xmin = 0.100
127 | 				xmax = 0.190
128 | 				text = "r"
129 | 			intervals [4]:
130 | 				xmin = 0.190
131 | 				xmax = 0.260
132 | 				text = "eh"
133 | 			intervals [5]:
134 | 				xmin = 0.260
135 | 				xmax = 0.320
136 | 				text = "k"
137 | 			intervals [6]:
138 | 				xmin = 0.320
139 | 				xmax = 0.350
140 | 				text = "ax"
141 | 			intervals [7]:
142 | 				xmin = 0.350
143 | 				xmax = 0.400
144 | 				text = "m"
145 | 			intervals [8]:
146 | 				xmin = 0.400
147 | 				xmax = 0.460
148 | 				text = "ax"
149 | 			intervals [9]:
150 | 				xmin = 0.460
151 | 				xmax = 0.490
152 | 				text = "n"
153 | 			intervals [10]:
154 | 				xmin = 0.490
155 | 				xmax = 0.530
156 | 				text = "d"
157 | 			intervals [11]:
158 | 				xmin = 0.530
159 | 				xmax = 0.660
160 | 				text = "ey"
161 | 			intervals [12]:
162 | 				xmin = 0.660
163 | 				xmax = 0.760
164 | 				text = "sh"
165 | 			intervals [13]:
166 | 				xmin = 0.760
167 | 				xmax = 0.790
168 | 				text = "ax"
169 | 			intervals [14]:
170 | 				xmin = 0.790
171 | 				xmax = 0.840
172 | 				text = "n"
173 | 			intervals [15]:
174 | 				xmin = 0.840
175 | 				xmax = 0.910
176 | 				text = "z"
177 | 			intervals [16]:
178 | 				xmin = 0.910
179 | 				xmax = 0.970
180 | 				text = "w"
181 | 			intervals [17]:
182 | 				xmin = 0.970
183 | 				xmax = 1.010
184 | 				text = "iy"
185 | 			intervals [18]:
186 | 				xmin = 1.010
187 | 				xmax = 1.100
188 | 				text = "hh"
189 | 			intervals [19]:
190 | 				xmin = 1.100
191 | 				xmax = 1.130
192 | 				text = "ax"
193 | 			intervals [20]:
194 | 				xmin = 1.130
195 | 				xmax = 1.180
196 | 				text = "v"
197 | 			intervals [21]:
198 | 				xmin = 1.180
199 | 				xmax = 1.250
200 | 				text = "hh"
201 | 			intervals [22]:
202 | 				xmin = 1.250
203 | 				xmax = 1.360
204 | 				text = "ih"
205 | 			intervals [23]:
206 | 				xmin = 1.360
207 | 				xmax = 1.420
208 | 				text = "r"
209 | 			intervals [24]:
210 | 				xmin = 1.420
211 | 				xmax = 1.540
212 | 				text = "s"
213 | 			intervals [25]:
214 | 				xmin = 1.540
215 | 				xmax = 1.610
216 | 				text = "ax"
217 | 			intervals [26]:
218 | 				xmin = 1.610
219 | 				xmax = 1.640
220 | 				text = "g"
221 | 			intervals [27]:
222 | 				xmin = 1.640
223 | 				xmax = 1.760
224 | 				text = "jh"
225 | 			intervals [28]:
226 | 				xmin = 1.760
227 | 				xmax = 1.850
228 | 				text = "eh"
229 | 			intervals [29]:
230 | 				xmin = 1.850
231 | 				xmax = 1.950
232 | 				text = "s"
233 | 			intervals [30]:
234 | 				xmin = 1.950
235 | 				xmax = 2.020
236 | 				text = "t"
237 | 			intervals [31]:
238 | 				xmin = 2.020
239 | 				xmax = 2.110
240 | 				text = "ax"
241 | 			intervals [32]:
242 | 				xmin = 2.110
243 | 				xmax = 2.200
244 | 				text = "d"
245 | 			intervals [33]:
246 | 				xmin = 2.200
247 | 				xmax = 2.600
248 | 				text = "sp"
249 | 			intervals [34]:
250 | 				xmin = 2.600
251 | 				xmax = 2.690
252 | 				text = "w"
253 | 			intervals [35]:
254 | 				xmin = 2.690
255 | 				xmax = 2.720
256 | 				text = "uh"
257 | 			intervals [36]:
258 | 				xmin = 2.720
259 | 				xmax = 2.780
260 | 				text = "d"
261 | 			intervals [37]:
262 | 				xmin = 2.780
263 | 				xmax = 2.880
264 | 				text = "g"
265 | 			intervals [38]:
266 | 				xmin = 2.880
267 | 				xmax = 2.950
268 | 				text = "r"
269 | 			intervals [39]:
270 | 				xmin = 2.950
271 | 				xmax = 3.080
272 | 				text = "ey"
273 | 			intervals [40]:
274 | 				xmin = 3.080
275 | 				xmax = 3.130
276 | 				text = "t"
277 | 			intervals [41]:
278 | 				xmin = 3.130
279 | 				xmax = 3.190
280 | 				text = "l"
281 | 			intervals [42]:
282 | 				xmin = 3.190
283 | 				xmax = 3.270
284 | 				text = "iy"
285 | 			intervals [43]:
286 | 				xmin = 3.270
287 | 				xmax = 3.310
288 | 				text = "ax"
289 | 			intervals [44]:
290 | 				xmin = 3.310
291 | 				xmax = 3.370
292 | 				text = "d"
293 | 			intervals [45]:
294 | 				xmin = 3.370
295 | 				xmax = 3.440
296 | 				text = "v"
297 | 			intervals [46]:
298 | 				xmin = 3.440
299 | 				xmax = 3.580
300 | 				text = "ae"
301 | 			intervals [47]:
302 | 				xmin = 3.580
303 | 				xmax = 3.640
304 | 				text = "n"
305 | 			intervals [48]:
306 | 				xmin = 3.640
307 | 				xmax = 3.740
308 | 				text = "s"
309 | 			intervals [49]:
310 | 				xmin = 3.740
311 | 				xmax = 3.780
312 | 				text = "dh"
313 | 			intervals [50]:
314 | 				xmin = 3.780
315 | 				xmax = 3.830
316 | 				text = "ax"
317 | 			intervals [51]:
318 | 				xmin = 3.830
319 | 				xmax = 3.950
320 | 				text = "s"
321 | 			intervals [52]:
322 | 				xmin = 3.950
323 | 				xmax = 4.000
324 | 				text = "ax"
325 | 			intervals [53]:
326 | 				xmin = 4.000
327 | 				xmax = 4.030
328 | 				text = "k"
329 | 			intervals [54]:
330 | 				xmin = 4.030
331 | 				xmax = 4.150
332 | 				text = "y"
333 | 			intervals [55]:
334 | 				xmin = 4.150
335 | 				xmax = 4.180
336 | 				text = "uh"
337 | 			intervals [56]:
338 | 				xmin = 4.180
339 | 				xmax = 4.260
340 | 				text = "r"
341 | 			intervals [57]:
342 | 				xmin = 4.260
343 | 				xmax = 4.300
344 | 				text = "ax"
345 | 			intervals [58]:
346 | 				xmin = 4.300
347 | 				xmax = 4.380
348 | 				text = "t"
349 | 			intervals [59]:
350 | 				xmin = 4.380
351 | 				xmax = 4.440
352 | 				text = "iy"
353 | 			intervals [60]:
354 | 				xmin = 4.440
355 | 				xmax = 4.500
356 | 				text = "ax"
357 | 			intervals [61]:
358 | 				xmin = 4.500
359 | 				xmax = 4.550
360 | 				text = "v"
361 | 			intervals [62]:
362 | 				xmin = 4.550
363 | 				xmax = 4.630
364 | 				text = "ih"
365 | 			intervals [63]:
366 | 				xmin = 4.630
367 | 				xmax = 4.670
368 | 				text = "n"
369 | 			intervals [64]:
370 | 				xmin = 4.670
371 | 				xmax = 4.820
372 | 				text = "ao"
373 | 			intervals [65]:
374 | 				xmin = 4.820
375 | 				xmax = 4.920
376 | 				text = "f"
377 | 			intervals [66]:
378 | 				xmin = 4.920
379 | 				xmax = 5.010
380 | 				text = "ax"
381 | 			intervals [67]:
382 | 				xmin = 5.010
383 | 				xmax = 5.330
384 | 				text = "s"
385 | 			intervals [68]:
386 | 				xmin = 5.330
387 | 				xmax = 5.590
388 | 				text = "sp"
389 | 			intervals [69]:
390 | 				xmin = 5.590
391 | 				xmax = 5.670
392 | 				text = "w"
393 | 			intervals [70]:
394 | 				xmin = 5.670
395 | 				xmax = 5.710
396 | 				text = "ih"
397 | 			intervals [71]:
398 | 				xmin = 5.710
399 | 				xmax = 5.780
400 | 				text = "th"
401 | 			intervals [72]:
402 | 				xmin = 5.780
403 | 				xmax = 5.880
404 | 				text = "aw"
405 | 			intervals [73]:
406 | 				xmin = 5.880
407 | 				xmax = 5.940
408 | 				text = "t"
409 | 			intervals [74]:
410 | 				xmin = 5.940
411 | 				xmax = 6.000
412 | 				text = "eh"
413 | 			intervals [75]:
414 | 				xmin = 6.000
415 | 				xmax = 6.060
416 | 				text = "n"
417 | 			intervals [76]:
418 | 				xmin = 6.060
419 | 				xmax = 6.170
420 | 				text = "iy"
421 | 			intervals [77]:
422 | 				xmin = 6.170
423 | 				xmax = 6.200
424 | 				text = "ax"
425 | 			intervals [78]:
426 | 				xmin = 6.200
427 | 				xmax = 6.270
428 | 				text = "m"
429 | 			intervals [79]:
430 | 				xmin = 6.270
431 | 				xmax = 6.370
432 | 				text = "p"
433 | 			intervals [80]:
434 | 				xmin = 6.370
435 | 				xmax = 6.490
436 | 				text = "eh"
437 | 			intervals [81]:
438 | 				xmin = 6.490
439 | 				xmax = 6.550
440 | 				text = "r"
441 | 			intervals [82]:
442 | 				xmin = 6.550
443 | 				xmax = 6.610
444 | 				text = "m"
445 | 			intervals [83]:
446 | 				xmin = 6.610
447 | 				xmax = 6.660
448 | 				text = "ax"
449 | 			intervals [84]:
450 | 				xmin = 6.660
451 | 				xmax = 6.730
452 | 				text = "n"
453 | 			intervals [85]:
454 | 				xmin = 6.730
455 | 				xmax = 6.830
456 | 				text = "t"
457 | 			intervals [86]:
458 | 				xmin = 6.830
459 | 				xmax = 7.080
460 | 				text = "sp"
461 | 			intervals [87]:
462 | 				xmin = 7.080
463 | 				xmax = 7.160
464 | 				text = "ah"
465 | 			intervals [88]:
466 | 				xmin = 7.160
467 | 				xmax = 7.230
468 | 				text = "v"
469 | 			intervals [89]:
470 | 				xmin = 7.230
471 | 				xmax = 7.370
472 | 				text = "aw"
473 | 			intervals [90]:
474 | 				xmin = 7.370
475 | 				xmax = 7.430
476 | 				text = "er"
477 | 			intervals [91]:
478 | 				xmin = 7.430
479 | 				xmax = 7.570
480 | 				text = "f"
481 | 			intervals [92]:
482 | 				xmin = 7.570
483 | 				xmax = 7.650
484 | 				text = "ah"
485 | 			intervals [93]:
486 | 				xmin = 7.650
487 | 				xmax = 7.690
488 | 				text = "n"
489 | 			intervals [94]:
490 | 				xmin = 7.690
491 | 				xmax = 7.720
492 | 				text = "d"
493 | 			intervals [95]:
494 | 				xmin = 7.720
495 | 				xmax = 7.760
496 | 				text = "ax"
497 | 			intervals [96]:
498 | 				xmin = 7.760
499 | 				xmax = 7.840
500 | 				text = "m"
501 | 			intervals [97]:
502 | 				xmin = 7.840
503 | 				xmax = 7.890
504 | 				text = "eh"
505 | 			intervals [98]:
506 | 				xmin = 7.890
507 | 				xmax = 7.970
508 | 				text = "n"
509 | 			intervals [99]:
510 | 				xmin = 7.970
511 | 				xmax = 8.010
512 | 				text = "t"
513 | 			intervals [100]:
514 | 				xmin = 8.010
515 | 				xmax = 8.070
516 | 				text = "ax"
517 | 			intervals [101]:
518 | 				xmin = 8.070
519 | 				xmax = 8.130
520 | 				text = "l"
521 | 			intervals [102]:
522 | 				xmin = 8.130
523 | 				xmax = 8.220
524 | 				text = "l"
525 | 			intervals [103]:
526 | 				xmin = 8.220
527 | 				xmax = 8.260
528 | 				text = "ih"
529 | 			intervals [104]:
530 | 				xmin = 8.260
531 | 				xmax = 8.310
532 | 				text = "b"
533 | 			intervals [105]:
534 | 				xmin = 8.310
535 | 				xmax = 8.400
536 | 				text = "er"
537 | 			intervals [106]:
538 | 				xmin = 8.400
539 | 				xmax = 8.460
540 | 				text = "t"
541 | 			intervals [107]:
542 | 				xmin = 8.460
543 | 				xmax = 8.660
544 | 				text = "iy"
545 | 			intervals [108]:
546 | 				xmin = 8.660
547 | 				xmax = 8.923582766439909
548 | 				text = "z"
549 | 


--------------------------------------------------------------------------------
/samples/libritts/LJ050-0278.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/samples/libritts/LJ050-0278.wav


--------------------------------------------------------------------------------
/samples/rjs_01_0003.lab:
--------------------------------------------------------------------------------
 1 | 0 1550000 pau !SENT_START
 2 | 1550000 3050000 h high
 3 | 3050000 4250000 ai 
 4 | 4250000 5050000 l labor
 5 | 5050000 6500000 ei 
 6 | 6500000 7200000 b 
 7 | 7200000 8450000 @ 
 8 | 8450000 10450000 s small
 9 | 10450000 10950000 m 
10 | 10950000 12300000 oo 
11 | 12300000 12750000 lw 
12 | 12750000 14000000 s scale
13 | 14000000 14500000 k 
14 | 14500000 16050000 ei 
15 | 16050000 17450000 lw 
16 | 17450000 18250000 e enterprises
17 | 18250000 18750000 n 
18 | 18750000 19300000 t 
19 | 19300000 19800000 @ 
20 | 19800000 20600000 p 
21 | 20600000 21500000 r 
22 | 21500000 22350000 ai 
23 | 22350000 23400000 z 
24 | 23400000 24300000 i 
25 | 24300000 25500000 z 
26 | 25500000 25850000 pau 
27 | 25850000 26200000 b by
28 | 26200000 27800000 ai 
29 | 27800000 28050000 i employing
30 | 28050000 28950000 m 
31 | 28950000 29800000 p 
32 | 29800000 30150000 l 
33 | 30150000 31550000 oi 
34 | 31550000 31850000 i 
35 | 31850000 32700000 ng 
36 | 32700000 33650000 l low
37 | 33650000 35200000 ou 
38 | 35200000 36550000 k cost
39 | 36550000 38000000 o 
40 | 38000000 38950000 s 
41 | 38950000 39800000 t 
42 | 39800000 40700000 m marginal
43 | 40700000 42250000 aa 
44 | 42250000 43100000 jh 
45 | 43100000 43350000 i 
46 | 43350000 43900000 n 
47 | 43900000 45500000 l! 
48 | 45500000 45800000 l labor
49 | 45800000 47000000 ei 
50 | 47000000 47600000 b 
51 | 47600000 48700000 @ 
52 | 48700000 51500000 pau 
53 | 51500000 51900000 w which
54 | 51900000 52300000 i 
55 | 52300000 53300000 ch 
56 | 53300000 53600000 r require
57 | 53600000 53900000 i 
58 | 53900000 54900000 k 
59 | 54900000 55300000 w 
60 | 55300000 57250000 ai 
61 | 57250000 57550000 @ 
62 | 57550000 58600000 l large
63 | 58600000 60700000 aa 
64 | 60700000 62100000 jh 
65 | 62100000 63000000 l labor
66 | 63000000 64100000 ei 
67 | 64100000 64400000 b 
68 | 64400000 65850000 @ 
69 | 65850000 66100000 i inputs
70 | 66100000 67100000 n 
71 | 67100000 67950000 p 
72 | 67950000 68750000 u 
73 | 68750000 70000000 t 
74 | 70000000 71350000 s 
75 | 71350000 73150000 pau !SENT_END
76 | 


--------------------------------------------------------------------------------
/samples/rjs_01_0003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/samples/rjs_01_0003.wav


--------------------------------------------------------------------------------
/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/screenshot.png


--------------------------------------------------------------------------------
/test/diff_num.py:
--------------------------------------------------------------------------------
 1 | # travis test for comparing prominence and boundary values across versions.
 2 | # allow for minor differences in values 
 3 | import sys, glob
 4 | import numpy as np
 5 | ref_files = sorted(glob.glob(sys.argv[1]+"/*.prom"))
 6 | test_files = sorted(glob.glob(sys.argv[2]+"/*.prom"))
 7 | 
 8 | for i in range(len(ref_files)):
 9 |     ref = (open(ref_files[i], "r")).readlines()
10 |     test = (open(test_files[i], "r")).readlines()
11 | 
12 |     val_ref = []
13 |     val_test = []
14 |     # compare prominence and boundary values with some tolerance
15 |     for l in ref:
16 |         val_ref.append(float(l.strip().split("\t")[-1]))
17 |         val_ref.append(float(l.strip().split("\t")[-2]))
18 |     for l in test:
19 |         val_test.append(float(l.strip().split("\t")[-1]))
20 |         val_test.append(float(l.strip().split("\t")[-2]))
21 | 
22 | 
23 |     assert np.allclose(np.array(val_ref), np.array(val_test), atol=0.3), \
24 |         ref_files[i]+" and "+test_files[i]+ " differ too much!"
25 | 


--------------------------------------------------------------------------------
/test/resources/libritts/7127_75947_000010_000000.prom:
--------------------------------------------------------------------------------
 1 | 7127_75947_000010_000000	0.000	0.680	yes	0.745	1.052
 2 | 7127_75947_000010_000000	0.750	0.890	the	-0.116	0.000
 3 | 7127_75947_000010_000000	0.890	1.450	character	3.169	1.255
 4 | 7127_75947_000010_000000	1.450	1.670	which	-0.008	0.000
 5 | 7127_75947_000010_000000	1.670	1.880	your	0.017	0.366
 6 | 7127_75947_000010_000000	1.880	2.250	royal	1.332	0.361
 7 | 7127_75947_000010_000000	2.250	2.660	highness	0.655	0.923
 8 | 7127_75947_000010_000000	2.660	3.190	assumed	2.819	1.281
 9 | 7127_75947_000010_000000	3.190	3.380	is	-0.057	0.000
10 | 7127_75947_000010_000000	3.380	3.520	in	0.006	0.511
11 | 7127_75947_000010_000000	3.520	3.960	perfect	2.022	0.487
12 | 7127_75947_000010_000000	3.960	4.380	harmony	0.900	0.630
13 | 7127_75947_000010_000000	4.380	4.550	with	-0.100	0.218
14 | 7127_75947_000010_000000	4.550	4.710	your	-0.024	0.000
15 | 7127_75947_000010_000000	4.710	5.080	own	1.285	1.000
16 | 


--------------------------------------------------------------------------------
/test/resources/libritts/LJ050-0276.prom:
--------------------------------------------------------------------------------
 1 | LJ050-0276	0.000	0.180	as	0.336	0.174
 2 | LJ050-0276	0.180	0.460	has	1.829	0.000
 3 | LJ050-0276	0.460	0.660	been	-0.023	0.684
 4 | LJ050-0276	0.660	1.070	pointed	0.584	0.247
 5 | LJ050-0276	1.070	1.430	out	0.942	1.440
 6 | LJ050-0276	1.790	1.900	the	-0.011	0.000
 7 | LJ050-0276	1.900	2.330	commission	1.712	0.000
 8 | LJ050-0276	2.330	2.510	has	-0.057	0.856
 9 | LJ050-0276	2.510	2.780	not	2.328	0.364
10 | LJ050-0276	2.780	3.350	resolved	0.250	1.209
11 | LJ050-0276	3.350	3.580	all	1.080	0.000
12 | LJ050-0276	3.580	3.690	the	0.000	0.891
13 | LJ050-0276	3.690	4.420	proposals	1.988	0.578
14 | LJ050-0276	4.420	4.650	which	0.633	0.057
15 | LJ050-0276	4.650	4.810	could	0.023	0.692
16 | LJ050-0276	4.810	4.970	be	-0.077	0.000
17 | LJ050-0276	4.970	5.450	made	1.280	0.978
18 | LJ050-0276	6.150	6.250	the	-0.003	0.000
19 | LJ050-0276	6.250	6.740	commission	1.177	0.121
20 | LJ050-0276	6.740	7.370	nevertheless	0.347	1.309
21 | LJ050-0276	7.370	7.510	is	0.000	0.000
22 | LJ050-0276	7.510	8.180	confident	2.469	0.663
23 | LJ050-0276	8.180	8.470	that	0.723	1.000
24 | 


--------------------------------------------------------------------------------
/test/resources/libritts/LJ050-0277.prom:
--------------------------------------------------------------------------------
 1 | LJ050-0277	0.000	0.170	with	0.430	1.010
 2 | LJ050-0277	0.170	0.340	the	-0.037	0.000
 3 | LJ050-0277	0.340	0.740	active	3.736	1.174
 4 | LJ050-0277	0.740	1.590	cooperation	0.959	0.439
 5 | LJ050-0277	1.590	1.710	of	0.000	0.000
 6 | LJ050-0277	1.710	1.810	the	-0.027	1.361
 7 | LJ050-0277	1.810	2.620	responsible	2.287	0.704
 8 | LJ050-0277	2.620	3.550	agencies	1.066	0.939
 9 | LJ050-0277	3.710	4.000	and	0.266	0.000
10 | LJ050-0277	4.000	4.170	with	0.864	0.244
11 | LJ050-0277	4.170	4.310	the	0.378	0.071
12 | LJ050-0277	4.310	5.030	understanding	1.089	0.087
13 | LJ050-0277	5.030	5.130	of	0.000	0.854
14 | LJ050-0277	5.130	5.240	the	-0.080	0.000
15 | LJ050-0277	5.240	5.590	people	2.182	0.000
16 | LJ050-0277	5.590	5.700	of	-0.009	1.087
17 | LJ050-0277	5.700	5.780	the	-0.060	0.000
18 | LJ050-0277	5.780	6.240	united	0.351	0.292
19 | LJ050-0277	6.240	6.720	states	0.911	0.204
20 | LJ050-0277	6.720	6.820	in	0.000	0.024
21 | LJ050-0277	6.820	7.000	their	0.095	0.083
22 | LJ050-0277	7.000	7.450	demands	0.350	0.774
23 | LJ050-0277	7.450	7.810	upon	2.003	0.767
24 | LJ050-0277	7.810	7.980	their	-0.103	0.000
25 | LJ050-0277	7.980	8.620	president	1.032	1.000
26 | 


--------------------------------------------------------------------------------
/test/resources/libritts/LJ050-0278.prom:
--------------------------------------------------------------------------------
 1 | LJ050-0278	0.000	0.100	the	0.000	0.000
 2 | LJ050-0278	0.100	0.910	recommendations	1.652	1.132
 3 | LJ050-0278	0.910	1.010	we	0.000	0.000
 4 | LJ050-0278	1.010	1.180	have	-0.049	0.124
 5 | LJ050-0278	1.180	1.420	here	1.098	0.460
 6 | LJ050-0278	1.420	2.200	suggested	1.787	1.327
 7 | LJ050-0278	2.600	2.780	would	0.205	0.297
 8 | LJ050-0278	2.780	3.270	greatly	2.707	0.259
 9 | LJ050-0278	3.270	3.740	advance	0.002	1.320
10 | LJ050-0278	3.740	3.830	the	-0.056	0.000
11 | LJ050-0278	3.830	4.440	security	1.914	0.000
12 | LJ050-0278	4.440	4.550	of	0.000	0.665
13 | LJ050-0278	4.550	4.670	the	-0.033	0.000
14 | LJ050-0278	4.670	5.330	office	0.960	1.147
15 | LJ050-0278	5.590	5.940	without	1.523	0.000
16 | LJ050-0278	5.940	6.170	any	0.148	0.613
17 | LJ050-0278	6.170	6.830	impairment	1.496	1.364
18 | LJ050-0278	7.080	7.230	of	0.000	0.000
19 | LJ050-0278	7.230	7.430	our	1.700	0.000
20 | LJ050-0278	7.430	8.130	fundamental	0.053	0.552
21 | LJ050-0278	8.130	8.924	liberties	0.713	1.000
22 | 


--------------------------------------------------------------------------------
/test/resources/test_spectrum/8hz_4hz_1hz.freqs.txt:
--------------------------------------------------------------------------------
1 | 16.00000 14.92853 13.92881 12.99604 12.12573 11.31371 10.55606 9.84916 9.18959 8.57419 8.00000 7.46426 6.96440 6.49802 6.06287 5.65685 5.27803 4.92458 4.59479 4.28709 4.00000 3.73213 3.48220 3.24901 3.03143 2.82843 2.63902 2.46229 2.29740 2.14355 2.00000 1.86607 1.74110 1.62450 1.51572 1.41421 1.31951 1.23114 1.14870 1.07177 1.00000 0.93303 0.87055 0.81225 0.75786 0.70711 0.65975 0.61557 0.57435 0.53589 0.50000 0.46652 0.43528 0.40613 0.37893 0.35355 0.32988 0.30779 0.28717 0.26794 0.25000 


--------------------------------------------------------------------------------
/test/resources/test_spectrum/8hz_4hz_1hz.spec.txt:
--------------------------------------------------------------------------------
1 | 0.06162 0.07188 0.08484 0.10062 0.12017 0.14874 0.19752 0.27625 0.37755 0.46408 0.48220 0.40586 0.27301 0.16186 0.11925 0.14038 0.20643 0.30600 0.42299 0.51960 0.53918 0.45020 0.29310 0.15702 0.08705 0.05898 0.04023 0.02544 0.02024 0.02280 0.02593 0.02505 0.02128 0.02030 0.02924 0.05343 0.09916 0.17173 0.26570 0.35278 0.38407 0.32698 0.20557 0.08974 0.02797 0.01087 0.00972 0.01044 0.01066 0.01113 0.01204 0.01370 0.01593 0.01885 0.02227 0.02558 0.02821 0.03015 0.03098 0.03060 0.03062 


--------------------------------------------------------------------------------
/test/run_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check default script
 4 | python3 wavelet_prosody_toolkit/cwt_analysis_synthesis.py -v samples/01l_fact_0001.wav 01l_fact_0001.cwt
 5 | diff 01l_fact_0001.cwt test/resources/01l_fact_0001.cwt
 6 | ret=$?
 7 | if [ $ret != 0 ]; then
 8 |     exit $ret
 9 | fi
10 | 
11 | # Check prosody labeller
12 | python3 wavelet_prosody_toolkit/prosody_labeller.py -v -o test_libri -c wavelet_prosody_toolkit/configs/libritts.yaml samples/libritts
13 | python3 test/diff_num.py  test_libri test/resources/libritts
14 | #diff -r test_libri test/resources/libritts
15 | ret=$?
16 | if [ $ret != 0 ]; then
17 |     exit $ret
18 | fi
19 | 
20 | # Check global spectrum extractor
21 | python3 wavelet_prosody_toolkit/cwt_global_spectrum.py -v -o test_spectrum samples/8hz_4hz_1hz.wav
22 | diff -r test_spectrum/ test/resources/test_spectrum
23 | ret=$?
24 | if [ $ret != 0 ]; then
25 |     exit $ret
26 | fi
27 | 


--------------------------------------------------------------------------------
/tools.rst:
--------------------------------------------------------------------------------
 1 | Additional tools for prosody processing with wavelets
 2 | -----------------------------------------------------
 3 | 
 4 | Besides the graphical Wavelet Prosody Analyzer, the repository contains additional command-line tools related to prosody processing with wavelets, described below.
 5 | Precise usage of the tools can be checked by running the tools with the --help flag, for example:
 6 | 
 7 | .. code:: sh
 8 | 	  
 9 |    python3 cwt_analysis_synthesis.py --help
10 | 
11 | |
12 | |
13 | 
14 | **prosody_labeller.py**
15 | 
16 | .. image:: img/prosody_labeller.png
17 |    :width: 600
18 | 		   
19 | This tool provides the same functionality as the graphical wavelet prosody analyzer. With parallel processing and no graphical overhead, it is suitable for processing large speech corpora. We also provide configuration files fine-tuned for English prominence and boundary estimation. Try:
20 | 
21 | .. code:: sh
22 | 
23 |    python3 prosody_labeller.py samples/libritts --config configs/libritts.yaml
24 | 
25 | or
26 |    
27 | .. code:: sh
28 | 
29 |    python3 prosody_labeller.py samples/libritts --config configs/libritts_boundary.yaml
30 | 	  
31 | 
32 | *Talman A, Suni A, Celikkanat H, Kakouros S, Tiedemann J, Vainio M. Predicting Prosodic Prominence from Text with Pre-trained Contextualized Word Representations. Nordic Conference of Computational Linguistics. 2019 Aug 9.*
33 | 
34 | *Antti Suni, Juraj Šimko, Daniel Aalto, Martti Vainio, Hierarchical representation and estimation of prosody using continuous wavelet transform, Computer Speech & Language, Volume 45, 2017, Pages 123-136, ISSN 0885-2308, https://doi.org/10.1016/j.csl.2016.11.001.*
35 | 
36 | |
37 | |
38 | 
39 | **cwt_analysis_synthesis.py**
40 | 
41 | .. image:: img/analysis_synthesis.png
42 |    :width: 600
43 | 	
44 | This tool demonstrates how F0 can be decomposed to temporal scales which can be associated to phonological levels, and how the original F0 contour can be reconstructed from these scales.
45 | 
46 | *Suni, A. S., Aalto, D., Raitio, T., Alku, P., & Vainio, M. (2013). Wavelets for intonation modeling in HMM speech synthesis. In A. Bonafonte (Ed.), 8th ISCA Workshop on Speech Synthesis, Proceedings, Barcelona, August 31 - September 2, 2013 (pp. 285-290). Barcelona: ISCA.*
47 | 
48 | |
49 | |
50 | 
51 | **cwt_global_spectrum.py**
52 | 
53 | .. image:: img/global_spectrum.png
54 |    :width: 600
55 | 		   
56 | This script extracts global wavelet spectrum of the speech envelope, similar to amplitude modulation spectrum.
57 | 
58 | *Suni , A , Kallio , H , Benus , S & Šimko , J 2019 , Characterizing second language fluency with global wavelet spectrum . in S Calhoun , P Escudero , M Tabain & P Warren (eds) , Proceedings of the 19th International Congress of Phonetic Sciences, Melbourne, Australia.*
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asuni/wavelet_prosody_toolkit/564d2aad4ae2401aab2e521255e1d65dacc3756d/wavelet_prosody_toolkit/__init__.py


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/configs/default.yaml:
--------------------------------------------------------------------------------
 1 | f0:
 2 |   use_existing_f0: false                   # do not perform pitch tracking if .f0 file exists
 3 |   pitch_tracker: "inst_freq"               # inst_freq (our algorithm) or REAPER (from google)
 4 |   min_f0: 50
 5 |   max_f0: 400
 6 |   voicing_threshold: 50                    # from 0 (all voiced) to 100 (no voicing)
 7 |   interpolation_method: "true_envelope"    # fill unvoiced sections; "linear", "pchip" or "true_envelope"
 8 | 
 9 | energy:
10 |   # subband for energy calculation (in hz)
11 |   band_min: 200
12 |   band_max: 5000
13 |   calculation_method: "rms"                # 'rms', 'hilbert' or 'true_envelope'  (root mean square, hilbert envelope, or true envelope -inspired method)
14 |   smooth_energy: True                      # smoothing reduces the effect of voicing related variation in energy contour
15 |   
16 | 
17 | duration:
18 |   acoustic_estimation: False               # estimate speech rate from signal using wavelet transform of energy
19 |   delta_duration: False                    # use differential duration signal, useful for boundary detection
20 |   duration_tiers: ["phones","words"]
21 |   weights: [0.5, 0.5]                      # weight for selected duration tiers
22 |   silence_symbols: ["#", "!pau", "<s>", "pau", "!sil", "sil", "", " ","<p>", "<p:>", "." ,",","?"]
23 |   linear: False                            # linear or logarithmic durations
24 |   bump: False                              # more lively signal by emphasizing the differences
25 |                                            # between adjacent unit duration
26 | 
27 | feature_combination:
28 |   type: "sum"                          # sum or product
29 |   detrend: True                            # detrend can be used to remove typical downdrift during utterance, mainly produces more balanced looking scalograms
30 |   weights:
31 |     f0: 1.0
32 |     energy: 1.0
33 |     duration: 0.5
34 | 
35 | labels:
36 |   annotation_tier: "words"                # adjust these to your annotation scheme
37 | 
38 | wavelet:
39 |   mother_wavelet: "mexican_hat"           # mexican_hat, morlet or paul
40 |   period: 3                               # applies to morlet and paul wavelets (periods 2 - 5 are reasonable)
41 | 
42 |   scale_distance: 0.25                    # distance between adjacent scales in octaves, (0.25 means 4 scales per octave)
43 |   num_scales: 34                          # number of wavelet scales
44 |   magnitude: False                        # for purposes other than prosodic event annotation,
45 |                                           # examinining wavelet magnitude might be useful (complex wavelets Morlet or Paul should be used)
46 | 
47 | # lines of maximum amplitude, defined as octaves below and above the measured unit scale
48 | loma:
49 |   prom_start: -3                          # -3 meaning three octaves below unit scale
50 |   prom_end: 0
51 |   boundary_start: -2
52 |   boundary_end: 1                         # one octave higher than unit scale
53 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/configs/libritts.yaml:
--------------------------------------------------------------------------------
 1 | f0:
 2 |   use_existing_f0: false                   # do not perform pitch tracking if .f0 file exists
 3 |   pitch_tracker: "inst_freq"               # inst_freq (our algorithm) or REAPER (from google)
 4 |   min_f0: 50
 5 |   max_f0: 400
 6 |   voicing_threshold: 20                    # from 0 (all voiced) to 100 (no voicing)
 7 |   interpolation_method: "true_envelope"    # fill unvoiced sections; "linear", "pchip" or "true_envelope"
 8 | 
 9 | energy:
10 |   # subband for energy calculation (in hz)
11 |   band_min: 400
12 |   band_max: 4000
13 |   calculation_method: "rms"                # 'rms', 'hilbert' or 'true_envelope'  (root mean square, hilbert envelope, or true envelope -inspired method)
14 |   smooth_energy: True                      # smoothing reduces the effect of voicing related variation in energy contour
15 | 
16 | 
17 | duration:
18 |   acoustic_estimation: False               # estimate speech rate from signal using wavelet transform of energy
19 |   delta_duration: False                    # use differential duration signal, useful for boundary detection
20 |   bump: False                              # more lively signal by emphasizing the differences
21 |                                            # between adjacent unit durations
22 |   duration_tiers: ["words", "phones"]
23 |   weights: [0.5, 0.5]                      # weight for selected duration tiers
24 |   #duration_tiers: ["words"]
25 |   silence_symbols: ["#", "!pau", "<s>", "pau","sp", "!sil", "sil", "", " ","<p>", "<p:>", "." ,",","?"]
26 |   linear: False                            # linear or logarithmic durations 
27 | feature_combination:
28 |   type: "product"                          # sum or product
29 |   detrend: True                            # detrend can be used to remove typical downdrift during utterance, mainly produces more balanced looking scalograms
30 |   weights:
31 |     f0: 1.0
32 |     energy: 0.5
33 |     duration: 1.0
34 | 
35 | labels:
36 |   annotation_tier: "words"                # adjust these to your annotation scheme
37 | 
38 | wavelet:
39 |   mother_wavelet: "mexican_hat"           # mexican_hat, morlet or paul
40 |   period: 3                               # applies to morlet and paul wavelets (periods 2 - 5 are reasonable)
41 | 
42 |   scale_distance: 0.25                    # distance between adjacent scales in octaves, (0.25 means 4 scales per octave)
43 |   num_scales: 40                          # number of wavelet scales
44 |   magnitude: False                        # for purposes other than prosodic event annotation,
45 |                                           # examinining wavelet magnitude might be useful (complex wavelets Morlet or Paul should be used)
46 | 
47 | # lines of maximum amplitude, defined as octaves below and above the measured unit scale
48 | loma:
49 |   prom_start: -2                          # -3 meaning three octaves below unit scale
50 |   prom_end: 1
51 |   boundary_start: -1
52 |   boundary_end: 2                         # one octave higher than unit scale
53 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/configs/libritts_boundary.yaml:
--------------------------------------------------------------------------------
 1 | f0:
 2 |   use_existing_f0: false                   # do not perform pitch tracking if .f0 file exists
 3 |   pitch_tracker: "inst_freq"               # inst_freq (our algorithm) or REAPER (from google)
 4 |   min_f0: 50
 5 |   max_f0: 400
 6 |   voicing_threshold: 20                    # from 0 (all voiced) to 100 (no voicing)
 7 |   interpolation_method: "true_envelope"    # fill unvoiced sections; "linear", "pchip" or "true_envelope"
 8 | 
 9 | energy:
10 |   # subband for energy calculation (in hz)
11 |   band_min: 400
12 |   band_max: 4000
13 |   calculation_method: "true_envelope"                # 'rms', 'hilbert' or 'true_envelope'  (root mean square, hilbert envelope, or true envelope -inspired method)
14 |   smooth_energy: True                      # smoothing reduces the effect of voicing related variation in energy contour
15 | 
16 | 
17 | duration:
18 |   acoustic_estimation: False               # estimate speech rate from signal using wavelet transform of energy
19 |   delta_duration: True                     # use differential duration signal, useful for boundary detection
20 |   bump: False                              # more lively duration signal by emphasizing the differences
21 |                                            # between adjacent unit durations
22 |   duration_tiers: ["words"] #, "phones"]
23 | 
24 |   weights: [0.5, 0.5]
25 |   silence_symbols: ["#", "!pau", "<s>", "pau","sp", "!sil", "sil", "", " ","<p>", "<p:>", "." ,",","?"]
26 |   linear: True                            # linear log logarithmi durations
27 | 
28 | feature_combination:
29 |   type: "sum"                             # sum or product
30 |   detrend: False                          # detrend can be used to remove typical downdrift during utterance, mainly produces more balanced looking scalograms
31 |   weights:
32 |     f0: 1.0
33 |     energy: 1.0
34 |     duration: 0.5
35 | 
36 | labels:
37 |   #annotation_tier: "word"                 # BURNC 
38 |   anotation_tier: "words"
39 | wavelet:
40 |   mother_wavelet: "mexican_hat"           # mexican_hat, morlet or paul
41 |   period: 3                               # applies to morlet and paul wavelets (periods 2 - 5 are reasonable)
42 | 
43 |   scale_distance: 0.25                    # distance between adjacent scales in octaves, (0.25 means 4 scales per octave)
44 |   num_scales: 40                          # number of wavelet scales
45 |   magnitude: False                        # for purposes other than prosodic event annotation,
46 |                                           # examinining wavelet magnitude might be useful (complex wavelets Morlet or Paul should be used)
47 | 
48 | # lines of maximum amplitude, defined as octaves below and above the measured unit scale
49 | loma:
50 |   prom_start: -2                          # -3 meaning three octaves below unit scale
51 |   prom_end: 1
52 |   boundary_start: -1
53 |   boundary_end: 2.5                        # one octave higher than unit scale
54 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/configs/synthesis.yaml:
--------------------------------------------------------------------------------
1 | wavelet:
2 |   mother_wavelet: "mexican_hat"           # mexican_hat, morlet or paul	
3 |   scale_distance: 1                    # distance between adjacent scales in octaves, (0.25 means 4 scales per octave)
4 |   num_scales: 12                          # number of wavelet scales
5 |   combined_scales: [[0, 2], [2, 4], [4, 6], [6, 8], [8, 12]]  
6 |  
7 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/cwt_analysis_synthesis.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | AUTHOR
  5 |     - Antti Suni <antti.suni@helsinki.fi>
  6 |     - Sébastien Le Maguer <lemagues@tcd.ie>
  7 | 
  8 | DESCRIPTION
  9 | 
 10 | usage: cwt_analysis_synthesis.py [-h] [-v] [-M MODE] [-m MEAN_F0] [-o OUTPUT]
 11 |                                  [-P]
 12 |                                  input_file
 13 | 
 14 | Tool for CWT analysis/synthesis of the F0
 15 | 
 16 | positional arguments:
 17 |   input_file            Input signal or F0 file
 18 | 
 19 | optional arguments:
 20 |   -h, --help            show this help message and exit
 21 |   -v, --verbosity       increase output verbosity
 22 |   -M MODE, --mode MODE  script mode: 0=analysis, 1=synthesis, 2=analysis/synthesis
 23 |   -m MEAN_F0, --mean_f0 MEAN_F0
 24 |                         Mean f0 needed for synthesis (unsed for analysis modes)
 25 |   -o OUTPUT, --output OUTPUT
 26 |                         output directory for analysis or filename for synthesis.
 27 |                         (Default: input_file directory [Analysis] or <input_file>.f0 [Synthesis])
 28 |   -P, --plot            Plot the results
 29 | 
 30 | 
 31 | LICENSE
 32 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
 33 | """
 34 | 
 35 | import sys
 36 | import os
 37 | import traceback
 38 | import argparse
 39 | import time
 40 | import logging
 41 | 
 42 | import yaml
 43 | 
 44 | # Collections
 45 | from collections import defaultdict
 46 | 
 47 | import warnings
 48 | 
 49 | # Wavelet import
 50 | from wavelet_prosody_toolkit.prosody_tools import misc
 51 | from wavelet_prosody_toolkit.prosody_tools import cwt_utils
 52 | from wavelet_prosody_toolkit.prosody_tools import f0_processing
 53 | 
 54 | import numpy as np
 55 | 
 56 | # List of logging levels used to setup everything using verbose option
 57 | LEVEL = [logging.WARNING, logging.INFO, logging.DEBUG]
 58 | 
 59 | # FIXME: be more specific!
 60 | warnings.simplefilter("ignore", np.ComplexWarning)  # Plotting can't deal with complex, but we don't care
 61 | 
 62 | 
 63 | ###############################################################################
 64 | # Functions
 65 | ###############################################################################
 66 | def apply_configuration(current_configuration, updating_part):
 67 |     """Utils to update the current configuration using the updating part
 68 | 
 69 |     Parameters
 70 |     ----------
 71 |     current_configuration: dict
 72 |         The current state of the configuration
 73 | 
 74 |     updating_part: dict
 75 |         The information to add to the current configuration
 76 | 
 77 |     Returns
 78 |     -------
 79 |     dict
 80 |        the updated configuration
 81 |     """
 82 |     if not isinstance(current_configuration, dict):
 83 |         return updating_part
 84 | 
 85 |     if current_configuration is None:
 86 |         return updating_part
 87 | 
 88 |     if updating_part is None:
 89 |         return current_configuration
 90 | 
 91 |     for k in updating_part:
 92 |         if k not in current_configuration:
 93 |             current_configuration[k] = updating_part[k]
 94 |         else:
 95 |             current_configuration[k] = apply_configuration(current_configuration[k], updating_part[k])
 96 | 
 97 |     return current_configuration
 98 | 
 99 | 
100 | def load_f0(input_file, binary_mode=False, configuration=None):
101 |     """Load the f0 from a text file or extract it from a wav file
102 | 
103 |     Parameters
104 |     ----------
105 |     input_file: string
106 |         The input file name.
107 | 
108 |     Returns
109 |     -------
110 |     1D arraylike
111 |        the raw f0 values
112 |     """
113 |     if input_file.lower().endswith(".csv"):
114 |         if binary_mode:
115 |             raise Exception("cannot have a csv file in binary mode")
116 |         else:
117 |             raw_f0 = np.loadtxt(input_file)
118 |     if input_file.lower().endswith(".f0"):
119 |         if binary_mode:
120 |             raw_f0 = np.fromfile(input_file, dtype=np.float32)
121 |         else:
122 |             raw_f0 = np.loadtxt(input_file)
123 |     elif input_file.lower().endswith(".lf0"):
124 |         if binary_mode:
125 |             raw_f0 = np.fromfile(input_file, dtype=np.float32)
126 |         else:
127 |             raw_f0 = np.loadtxt(input_file)
128 |         raw_f0 = np.exp(raw_f0)
129 |     elif input_file.lower().endswith(".wav"):
130 |         logging.info("Extracting the F0 from the signal")
131 |         (fs, wav_form) = misc.read_wav(input_file)
132 |         raw_f0 = f0_processing.extract_f0(wav_form, fs,
133 |                                           configuration["f0"]["min_f0"],
134 |                                           configuration["f0"]["max_f0"])
135 | 
136 | 
137 |     return raw_f0
138 | 
139 | 
140 | ###############################################################################
141 | # Main function
142 | ###############################################################################
143 | def run():
144 |     """Main entry function
145 | 
146 |     This function contains the code needed to achieve the analysis and/or the synthesis
147 |     """
148 |     global args
149 | 
150 |     warnings.simplefilter("ignore", FutureWarning)     # Plotting can't deal with complex, but we don't care
151 | 
152 |     # Loading default configuration
153 |     configuration = defaultdict()
154 |     with open(os.path.dirname(os.path.realpath(__file__)) + "/configs/default.yaml", 'r') as f:
155 |         configuration = apply_configuration(configuration, defaultdict(lambda: False, yaml.safe_load(f)))
156 |         logging.debug("default configuration")
157 |         logging.debug(configuration)
158 | 
159 |     # Loading dedicated analysis.synthesis configuration
160 |     with open(os.path.dirname(os.path.realpath(__file__)) + "/configs/synthesis.yaml", 'r') as f:
161 |         configuration = apply_configuration(configuration, defaultdict(lambda: False, yaml.safe_load(f)))
162 |         logging.debug("configuration filled with synthesis part")
163 |         logging.debug(configuration)
164 | 
165 |     # Loading user configuration
166 |     if args.configuration_file:
167 |         try:
168 |             with open(args.configuration_file, 'r') as f:
169 |                 configuration = apply_configuration(configuration, defaultdict(lambda: False, yaml.safe_load(f)))
170 |                 logging.debug("configuration filled with user part")
171 |                 logging.debug(configuration)
172 |         except IOError as ex:
173 |             logging.error("configuration file " + args.config + " could not be loaded:")
174 |             logging.error(ex.msg)
175 |             sys.exit(1)
176 | 
177 |     # Analysis Mode
178 |     if args.mode == 0:
179 |         raw_f0 = load_f0(args.input_file, args.binary_mode, configuration)
180 | 
181 |         logging.info("Processing f0")
182 |         f0 = f0_processing.process(raw_f0)
183 |         # FIXME: reintegrated
184 |         if args.plot:
185 |             # Plotting
186 |             import matplotlib.pyplot as plt
187 |             import matplotlib.colors as colors
188 | 
189 |             plt.title("F0 preprocessing and interpolation")
190 |             plt.plot(f0, color="red", alpha=0.5, linewidth=3)
191 |             plt.plot(raw_f0, color="gray", alpha=0.5)
192 |             plt.show()
193 | 
194 |         # # FIXME: read this?
195 |         # logging.info("writing interpolated lf0\t" + output_file + ".interp")
196 |         # np.savetxt(output_file + ".interp", f0.astype('float'),
197 |         #            fmt="%f", delimiter="\n")
198 | 
199 |         # Perform continuous wavelet transform of mean-substracted f0 with 12 scales, one octave apart
200 |         logging.info("Starting analysis with (num_scale=%d, scale_distance=%f, mother_name=%s)" %
201 |                      (configuration["wavelet"]["num_scales"], configuration["wavelet"]["scale_distance"], configuration["wavelet"]["mother_wavelet"]))
202 |         full_scales, widths, _ = cwt_utils.cwt_analysis(f0 - np.mean(f0),
203 |                                                         mother_name=configuration["wavelet"]["mother_wavelet"],
204 |                                                         period=configuration["wavelet"]["period"],
205 |                                                         num_scales=configuration["wavelet"]["num_scales"],
206 |                                                         scale_distance=configuration["wavelet"]["scale_distance"],
207 |                                                         apply_coi=False)
208 |         full_scales = np.real(full_scales)
209 |         # SSW parameterization, adjacent scales combined (with extra scales to handle long utterances)
210 |         scales = cwt_utils.combine_scales(np.real(full_scales), configuration["wavelet"]["combined_scales"])
211 |         for i in range(0, len(scales)):
212 |             logging.debug("Mean scale[%d]: %s" % (i, str(np.mean(scales[i]))))
213 | 
214 |         # Saving matrix
215 |         logging.info("writing wavelet matrix in \"%s\"" % args.output_file)
216 |         if args.binary_mode:
217 |             with open(args.output_file, "wb") as f_out:
218 |                 scales.T.astype(np.float32).tofile(f_out)
219 |         else:
220 |             np.savetxt(args.output_file, scales.T.astype('float'), fmt="%f", delimiter=",")
221 | 
222 |     # Synthesis mode
223 |     if args.mode == 1:
224 |         if args.binary_mode:
225 |             scales = np.fromfile(args.input_file, dtype=np.float32)
226 |             scales = scales.reshape(-1, len(configuration["wavelet"]["combined_scales"])).T
227 |         else:
228 |             scales = np.loadtxt(args.input_file, delimiter=",").T  # FIXME: hardcoded
229 | 
230 |         rec = cwt_utils.cwt_synthesis(scales, args.mean_f0)
231 | 
232 |         logging.info("Save reconstructed f0 in %s" % args.output_file)
233 |         if args.binary_mode:
234 |             with open(args.output_file, "wb") as f_out:
235 |                 rec.astype(np.float32).tofile(f_out)
236 |         else:
237 |             np.savetxt(args.output_file, rec, fmt="%f")
238 | 
239 |     # Debugging /plotting part
240 |     if args.plot:
241 |         nb_sub = 2
242 |         if args.mode == 0:
243 |             nb_sub = 3
244 | 
245 |         ax = plt.subplot(nb_sub, 1, 1)
246 |         # pylab.title("CWT decomposition to % scales and reconstructed signal" % len(configuration["wavelet"]["combined_scales"]))
247 | 
248 |         if args.mode == 0:
249 |             plt.plot(f0, linewidth=1, color="red")
250 |             rec = cwt_utils.cwt_synthesis(scales, np.mean(f0))
251 | 
252 |         plt.plot(rec, color="blue", alpha=0.3)
253 | 
254 |         plt.subplot(nb_sub, 1, 2, sharex=ax)
255 |         for i in range(0, len(scales)):
256 |             plt.plot(scales[i] + max(rec)*1.5 + i*75,
257 |                      color="blue", alpha=0.5)
258 |             #plt.plot(scales[len(scales)-i-1] + max(rec)*1.5 + i*75,
259 | 
260 | 
261 | 
262 |         if args.mode == 0:
263 |             plt.subplot(nb_sub, 1, 3, sharex=ax)
264 |             plt.contourf(np.real(full_scales), 100,
265 |                          norm=colors.SymLogNorm(linthresh=0.2, linscale=0.05,
266 |                                                 vmin=np.min(full_scales), vmax=np.max(full_scales)),cmap="jet")
267 |         plt.show()
268 | 
269 | 
270 | ###############################################################################
271 | #  Envelopping
272 | ###############################################################################
273 | def main():
274 |     """Entry point for CWT analysis/synthesis tool
275 | 
276 |     This function is a wrapper to deal with arguments and logging.
277 |     """
278 |     global args
279 | 
280 |     try:
281 |         parser = argparse.ArgumentParser(description="Tool for CWT analysis/synthesis of the F0")
282 | 
283 |         # Add options
284 |         parser.add_argument("-B", "--binary-mode", action="store_true",
285 |                             help="Activate binary mode, else files are assumed to be a csv for the f0/wavelet part")
286 |         parser.add_argument("-c", "--configuration-file", default=None, help="configuration file")
287 |         parser.add_argument("-M", "--mode", type=int, default=0,
288 |                             help="script mode: 0=analysis, 1=synthesis")
289 |         parser.add_argument("-m", "--mean_f0", type=float, default=100,
290 |                             help="Mean f0 needed for synthesis (unsed for analysis modes)")
291 |         parser.add_argument("-P", "--plot", action="store_true",
292 |                             help="Plot the results")
293 |         parser.add_argument("-v", "--verbosity", action="count", default=0,
294 |                             help="increase output verbosity")
295 | 
296 |         # Add arguments
297 |         parser.add_argument("input_file", help="Input signal or F0 file")
298 |         parser.add_argument("output_file",
299 |                             help="output directory for analysis or filename for synthesis. " +
300 |                             "(Default: input_file directory [Analysis] or <input_file>.f0 [Synthesis])")
301 | 
302 |         # Parsing arguments
303 |         args = parser.parse_args()
304 | 
305 |         # Verbose level => logging level
306 |         log_level = args.verbosity
307 |         if (args.verbosity >= len(LEVEL)):
308 |             log_level = len(LEVEL) - 1
309 |             logging.basicConfig(level=LEVEL[log_level])
310 |             logging.warning("verbosity level is too high, I'm gonna assume you're taking the highest (%d)" % log_level)
311 |         else:
312 |             logging.basicConfig(level=LEVEL[log_level])
313 | 
314 |         # Debug time
315 |         start_time = time.time()
316 |         logging.info("start time = " + time.asctime())
317 | 
318 |         # Running main function <=> run application
319 |         run()
320 | 
321 |         # Debug time
322 |         logging.info("end time = " + time.asctime())
323 |         logging.info('TOTAL TIME IN MINUTES: %02.2f' %
324 |                      ((time.time() - start_time) / 60.0))
325 | 
326 |         # Exit program
327 |         sys.exit(0)
328 |     except KeyboardInterrupt as e:  # Ctrl-C
329 |         raise e
330 |     except SystemExit as e:  # sys.exit()
331 |         pass
332 |     except Exception as e:
333 |         logging.error('ERROR, UNEXPECTED EXCEPTION')
334 |         logging.error(str(e))
335 |         traceback.print_exc(file=sys.stderr)
336 |         sys.exit(-1)
337 | 
338 | 
339 | if __name__ == '__main__':
340 |     main()
341 | 
342 | # cwt_analysis_synthesis.py ends here
343 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/cwt_global_spectrum.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | AUTHOR
  5 |     - Antti Suni <antti.suni@helsinki.fi>
  6 |     - Sébastien Le Maguer <lemagues@tcd.ie>
  7 | 
  8 | DESCRIPTION
  9 | 
 10 | usage: cwt_global_spectrum.py [-h] [-v] [-o OUTPUT]
 11 |                               [-P]
 12 |                               input_file
 13 | 
 14 | 
 15 | Tool for extracting global wavelet spectrum of speech envelope
 16 | introduced for second language fluency estimation in the following paper:
 17 | 
 18 | @inproceedings{suni2019characterizing,
 19 |   title={Characterizing second language fluency with global wavelet spectrum},
 20 |   author={Suni, Antti and Kallio, Heini and Benu{\v{s}}, {\v{S}}tefan and {\v{S}}imko, Juraj},
 21 |   booktitle={International Congress of Phonetic Sciences},
 22 |   pages={1947--1951},
 23 |   year={2019},
 24 |   organization={Australasian Speech Science and Technology Association Inc.}
 25 | }
 26 | 
 27 | positional arguments:
 28 |   input_file            Input signal or F0 file
 29 | 
 30 | optional arguments:
 31 |   -h, --help            show this help message and exit
 32 |   -v, --verbosity       increase output verbosity
 33 |   -o OUTPUT, --output OUTPUT
 34 |                         output directory for analysis or filename for synthesis.
 35 |                         (Default: input_file directory [Analysis] or <input_file>.f0 [Synthesis])
 36 |   -P, --plot            Plot the results
 37 | 
 38 | 
 39 | You should be able to see peak around 4Hz, corresponding to syllable rate.
 40 | For longer speech files, lower frequency peaks related to phrasing should appear.
 41 | Synthetic test file with 8Hz, 4Hz and 1Hz components is included in sample directory.
 42 | 
 43 | 
 44 | LICENSE
 45 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
 46 | 
 47 | """
 48 | 
 49 | # System/default
 50 | import sys
 51 | import os
 52 | 
 53 | # Arguments
 54 | import argparse
 55 | 
 56 | # Messaging/logging
 57 | import traceback
 58 | import time
 59 | import logging
 60 | 
 61 | # Math/plot
 62 | import numpy as np
 63 | import matplotlib.ticker
 64 | import matplotlib.pyplot as plt
 65 | 
 66 | # Libraries
 67 | from wavelet_prosody_toolkit.prosody_tools import cwt_utils as cwt_utils
 68 | from wavelet_prosody_toolkit.prosody_tools import misc as misc
 69 | from wavelet_prosody_toolkit.prosody_tools import energy_processing as energy_processing
 70 | 
 71 | 
 72 | ###############################################################################
 73 | # global constants
 74 | ###############################################################################
 75 | LEVEL = [logging.WARNING, logging.INFO, logging.DEBUG]
 76 | 
 77 | 
 78 | ###############################################################################
 79 | # Functions
 80 | ###############################################################################
 81 | def calc_global_spectrum(wav_file, period=5, n_scales=60, plot=False):
 82 |     """
 83 |     """
 84 | 
 85 |     # Extract signal envelope, scale and normalize
 86 |     (fs, waveform) = misc.read_wav(wav_file)
 87 |     waveform = misc.resample(waveform, fs, 16000)
 88 |     energy = energy_processing.extract_energy(waveform, min_freq=30, method="hilbert")
 89 |     energy[energy<0] = 0
 90 |     energy = np.cbrt(energy+0.1)
 91 |     params = misc.normalize_std(energy)
 92 | 
 93 | 
 94 |     # perform continous wavelet transform on envelope with morlet wavelet
 95 | 
 96 |     # increase _period to get sharper spectrum
 97 |     matrix, scales, freq = cwt_utils.cwt_analysis(params, first_freq = 16, num_scales = n_scales, scale_distance  = 0.1,period=period, mother_name="Morlet",apply_coi=True)
 98 | 
 99 | 
100 |     # power, arbitrary scaling to prevent underflow
101 |     p_matrix = (abs(matrix)**2).astype('float32')*1000.0
102 |     power_spec = np.nanmean(p_matrix,axis=1)
103 | 
104 |     if plot:
105 |         f, wave_pics = plt.subplots(1, 2, gridspec_kw = {'width_ratios':[5, 1]},  sharey=True)
106 |         f.subplots_adjust(hspace=10)
107 |         f.subplots_adjust(wspace=0)
108 |         wave_pics[0].set_ylim(0, n_scales)
109 |         wave_pics[0].set_xlabel("Time(m:s)")
110 |         wave_pics[0].set_ylabel("Frequency(Hz)")
111 |         wave_pics[1].set_xlabel("power")
112 |         wave_pics[1].tick_params(labelright=True)
113 | 
114 |         fname = os.path.basename(wav_file)
115 |         title = "CWT Morlet(p="+str(period)+") global spectrum, "+ fname
116 |         wave_pics[0].contourf(p_matrix, 100)
117 |         wave_pics[0].set_title(title, loc="center")
118 |         wave_pics[0].plot(params*3, color="white",alpha=0.5)
119 | 
120 |         freq_labels =  [round(x,3)
121 |                         if (np.isclose(x, round(x)) or
122 |                             (x < 2 and np.isclose(x*100., round(x*100))) or
123 |                             (x < 0.5 and np.isclose(x*10000., round(x*10000))))
124 |                         else ""
125 |                         for x in list(freq)]
126 | 
127 |         wave_pics[0].set_yticks(np.linspace(0, len(freq_labels)-1, len(freq_labels)))
128 |         wave_pics[0].set_yticklabels(freq_labels)
129 |         formatter = matplotlib.ticker.FuncFormatter(lambda ms, x: time.strftime('%M:%S', time.gmtime(ms // 200)))
130 |         wave_pics[0].xaxis.set_major_formatter(formatter)
131 |         wave_pics[1].grid(axis="y")
132 |         wave_pics[1].plot(power_spec,np.linspace(0,len(power_spec), len(power_spec)),"-")
133 |         plt.show()
134 | 
135 | 
136 |     return (power_spec, freq)
137 | 
138 | ###############################################################################
139 | # Main function
140 | ###############################################################################
141 | def main():
142 |     """Main entry function
143 |     """
144 |     global args
145 | 
146 |     period = 5
147 |     n_scales = 60
148 | 
149 |     # Compute the global spectrum
150 |     (power_spec, freq) = calc_global_spectrum(args.wav_file, period, n_scales, args.plot)
151 | 
152 |     # save spectrum and associated frequencies for further processing
153 |     output_dir = os.path.dirname(args.wav_file)
154 |     if args.output_dir is not None:
155 |         output_dir = args.output_dir
156 |     os.makedirs(output_dir, exist_ok=True)
157 |     basename = os.path.join(output_dir, os.path.splitext(os.path.basename(args.wav_file))[0])
158 |     np.savetxt(basename+".spec.txt", power_spec, fmt="%.5f", newline= " ")
159 |     np.savetxt(basename+".freqs.txt", freq, fmt="%.5f", newline= " ")
160 | 
161 | 
162 | ###############################################################################
163 | #  Envelopping
164 | ###############################################################################
165 | if __name__ == '__main__':
166 |     try:
167 |         parser = argparse.ArgumentParser(description="")
168 | 
169 |         # Add options
170 |         parser.add_argument("-l", "--log_file", default=None,
171 |                             help="Logger file")
172 |         parser.add_argument("-o", "--output_dir", default=None, type=str,
173 |                             help="The output directory (if not defined, use the same directory than the wave file)")
174 |         parser.add_argument("-P", "--plot", default=False, action="store_true",
175 |                             help="Plot the results")
176 |         parser.add_argument("-v", "--verbosity", action="count", default=0,
177 |                             help="increase output verbosity")
178 | 
179 |         # Add arguments
180 |         parser.add_argument("wav_file", help="The input wave file")
181 | 
182 |         # Parsing arguments
183 |         args = parser.parse_args()
184 | 
185 |         # create logger and formatter
186 |         logger = logging.getLogger()
187 |         formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
188 | 
189 |         # Verbose level => logging level
190 |         log_level = args.verbosity
191 |         if (args.verbosity >= len(LEVEL)):
192 |             log_level = len(LEVEL) - 1
193 |             logger.setLevel(log_level)
194 |             logging.warning("verbosity level is too high, I'm gonna assume you're taking the highest (%d)" % log_level)
195 |         else:
196 |             logger.setLevel(LEVEL[log_level])
197 | 
198 |         # create console handler
199 |         ch = logging.StreamHandler()
200 |         ch.setFormatter(formatter)
201 |         logger.addHandler(ch)
202 | 
203 |         # create file handler
204 |         if args.log_file is not None:
205 |             fh = logging.FileHandler(args.log_file)
206 |             logger.addHandler(fh)
207 | 
208 |         # Debug time
209 |         start_time = time.time()
210 |         logger.info("start time = " + time.asctime())
211 | 
212 |         # Running main function <=> run application
213 |         main()
214 | 
215 |         # Debug time
216 |         logging.info("end time = " + time.asctime())
217 |         logging.info('TOTAL TIME IN MINUTES: %02.2f' %
218 |                      ((time.time() - start_time) / 60.0))
219 | 
220 |         # Exit program
221 |         sys.exit(0)
222 |     except KeyboardInterrupt as e:  # Ctrl-C
223 |         raise e
224 |     except SystemExit:  # sys.exit()
225 |         pass
226 |     except Exception as e:
227 |         logging.error('ERROR, UNEXPECTED EXCEPTION')
228 |         logging.error(str(e))
229 |         traceback.print_exc(file=sys.stderr)
230 |         sys.exit(-1)
231 | 
232 | 
233 | else:
234 |     print("usage: cwt_global_spectrum.py <audiofile>")
235 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_labeller.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | AUTHOR
  5 | 
  6 |     Sébastien Le Maguer <lemagues@tcd.ie>
  7 | 
  8 | DESCRIPTION
  9 | 
 10 | LICENSE
 11 |     This script is in the public domain, free from copyrights or restrictions.
 12 |     Created: 27 January 2020
 13 | """
 14 | 
 15 | # System/default
 16 | import sys
 17 | import os
 18 | import glob
 19 | 
 20 | # Arguments
 21 | import argparse
 22 | 
 23 | # Messaging/logging
 24 | import traceback
 25 | import time
 26 | import logging
 27 | import copy
 28 | 
 29 | # Configuration
 30 | import yaml
 31 | from collections import defaultdict
 32 | 
 33 | # Math and plotting
 34 | import numpy as np
 35 | import scipy.ndimage
 36 | import matplotlib.pyplot as plt
 37 | 
 38 | # Parallel job managment
 39 | from joblib import Parallel, delayed
 40 | 
 41 | # acoustic features
 42 | from wavelet_prosody_toolkit.prosody_tools import energy_processing
 43 | from wavelet_prosody_toolkit.prosody_tools import f0_processing
 44 | from wavelet_prosody_toolkit.prosody_tools import duration_processing
 45 | 
 46 | # helpers
 47 | from wavelet_prosody_toolkit.prosody_tools import misc
 48 | from wavelet_prosody_toolkit.prosody_tools import smooth_and_interp
 49 | 
 50 | # wavelet transform
 51 | from wavelet_prosody_toolkit.prosody_tools import cwt_utils, loma, lab
 52 | 
 53 | ###############################################################################
 54 | # global constants
 55 | ###############################################################################
 56 | LEVEL = [logging.WARNING, logging.INFO, logging.DEBUG]
 57 | 
 58 | ###############################################################################
 59 | # Functions
 60 | ###############################################################################
 61 | def get_logger(verbosity, log_file):
 62 | 
 63 |     # create logger and formatter
 64 |     logger = logging.getLogger("prosody labeller")
 65 |     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 66 | 
 67 |     # Verbose level => logging level
 68 |     log_level = verbosity
 69 |     if (log_level >= len(LEVEL)):
 70 |         log_level = len(LEVEL) - 1
 71 |         logger.setLevel(log_level)
 72 |         logging.warning("verbosity level is too high, I'm gonna assume you're taking the highest (%d)" % log_level)
 73 |     else:
 74 |         logger.setLevel(LEVEL[log_level])
 75 | 
 76 |     # create console handler
 77 |     ch = logging.StreamHandler()
 78 |     ch.setFormatter(formatter)
 79 |     logger.addHandler(ch)
 80 | 
 81 |     # create file handler
 82 |     if log_file is not None:
 83 |         fh = logging.FileHandler(log_file)
 84 |         logger.addHandler(fh)
 85 | 
 86 |     return logger
 87 | 
 88 | 
 89 | def apply_configuration(current_configuration, updating_part):
 90 |     """Utils to update the current configuration using the updating part
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     current_configuration: dict
 95 |         The current state of the configuration
 96 | 
 97 |     updating_part: dict
 98 |         The information to add to the current configuration
 99 | 
100 |     Returns
101 |     -------
102 |     dict
103 |        the updated configuration
104 |     """
105 |     if not isinstance(current_configuration, dict):
106 |         return updating_part
107 | 
108 |     if current_configuration is None:
109 |         return updating_part
110 | 
111 |     if updating_part is None:
112 |         return current_configuration
113 | 
114 |     for k in updating_part:
115 |         if k not in current_configuration:
116 |             current_configuration[k] = updating_part[k]
117 |         else:
118 |             current_configuration[k] = apply_configuration(current_configuration[k], updating_part[k])
119 | 
120 |     return current_configuration
121 | 
122 | 
123 | 
124 | def analysis(input_file, cfg, logger, annotation_dir=None, output_dir=None, plot=False):
125 | 
126 |     # Load the wave file
127 |     print("Analyzing %s starting..." % input_file)
128 |     orig_sr, sig = misc.read_wav(input_file)
129 | 
130 |     # extract energy
131 |     energy = energy_processing.extract_energy(sig, orig_sr,
132 |                                               cfg["energy"]["band_min"],
133 |                                               cfg["energy"]["band_max"],
134 |                                               cfg["energy"]["calculation_method"])
135 |     energy = np.cbrt(energy+1)
136 |     if cfg["energy"]["smooth_energy"]:
137 |         energy = smooth_and_interp.peak_smooth(energy, 30, 3)  # FIXME: 30? 3?
138 |         energy = smooth_and_interp.smooth(energy, 10)
139 | 
140 |     # extract f0
141 |     raw_pitch = f0_processing.extract_f0(sig, orig_sr,
142 |                                          f0_min=cfg["f0"]["min_f0"],
143 |                                          f0_max=cfg["f0"]["max_f0"],
144 |                                          voicing=cfg["f0"]["voicing_threshold"],
145 |                                          #harmonics=cfg["f0"]["harmonics"],
146 |                                          configuration=cfg["f0"]["pitch_tracker"])
147 |     # interpolate, stylize
148 |     pitch = f0_processing.process(raw_pitch)
149 | 
150 |     # extract speech rate
151 |     rate = np.zeros(len(pitch))
152 | 
153 | 
154 |     # Get annotations (if available)
155 |     tiers = []
156 |     if annotation_dir is None:
157 |         annotation_dir = os.path.dirname(input_file)
158 |     basename = os.path.splitext(os.path.basename(input_file))[0]
159 |     grid =  os.path.join(annotation_dir, "%s.TextGrid" % basename)
160 |     if os.path.exists(grid):
161 |         tiers = lab.read_textgrid(grid)
162 |     else:
163 |         grid =  os.path.join(annotation_dir, "%s.lab" % basename)
164 |         if not os.path.exists(grid):
165 |             raise Exception("There is no annotations associated with %s" % input_file)
166 |         tiers = lab.read_htk_label(grid)
167 | 
168 |     # Extract duration
169 |     if len(tiers) > 0:
170 |         dur_tiers = []
171 |         for level in cfg["duration"]["duration_tiers"]:
172 |             assert(level.lower() in tiers), level+" not defined in tiers: check that duration_tiers in config match the actual textgrid tiers"
173 |             try:
174 |                 dur_tiers.append(tiers[level.lower()])
175 |             except:
176 |                 print("\nerror: "+"\""+level+"\"" +" not in labels, modify duration_tiers in config\n\n")
177 |                 raise
178 | 
179 |     if not cfg["duration"]["acoustic_estimation"]:
180 |         rate = duration_processing.get_duration_signal(dur_tiers,
181 |                                                        weights=cfg["duration"]["weights"],
182 |                                                        linear=cfg["duration"]["linear"],
183 |                                                        sil_symbols=cfg["duration"]["silence_symbols"],
184 |                                                        bump = cfg["duration"]["bump"])
185 | 
186 |     else:
187 |         rate = duration_processing.get_rate(energy)
188 |         rate = smooth_and_interp.smooth(rate, 30)
189 | 
190 |     if cfg["duration"]["delta_duration"]:
191 |             rate = np.diff(rate)
192 | 
193 |     # Combine signals
194 |     min_length = np.min([len(pitch), len(energy), len(rate)])
195 |     pitch = pitch[:min_length]
196 |     energy = energy[:min_length]
197 |     rate = rate[:min_length]
198 | 
199 |     if cfg["feature_combination"]["type"] == "product":
200 |         pitch = misc.normalize_minmax(pitch) ** cfg["feature_combination"]["weights"]["f0"]
201 |         energy = misc.normalize_minmax(energy) ** cfg["feature_combination"]["weights"]["energy"]
202 |         rate =  misc.normalize_minmax(rate) ** cfg["feature_combination"]["weights"]["duration"]
203 |         params = pitch * energy * rate
204 | 
205 |     else:
206 |         params = misc.normalize_std(pitch) * cfg["feature_combination"]["weights"]["f0"] + \
207 |                  misc.normalize_std(energy) * cfg["feature_combination"]["weights"]["energy"] + \
208 |                  misc.normalize_std(rate) * cfg["feature_combination"]["weights"]["duration"]
209 | 
210 |     if cfg["feature_combination"]["detrend"]:
211 |          params = smooth_and_interp.remove_bias(params, 800)
212 | 
213 |     params = misc.normalize_std(params)
214 | 
215 | 
216 |     # CWT analysis
217 |     (cwt, scales, freqs) = cwt_utils.cwt_analysis(params,
218 |                                                   mother_name=cfg["wavelet"]["mother_wavelet"],
219 |                                                   period=cfg["wavelet"]["period"],
220 |                                                   num_scales=cfg["wavelet"]["num_scales"],
221 |                                                   scale_distance=cfg["wavelet"]["scale_distance"],
222 |                                                   apply_coi=False)
223 |     cwt = np.real(cwt)
224 |     scales *= 200 # FIXME: why 200?
225 | 
226 | 
227 |     # Compute lines of maximum amplitude
228 |     assert(cfg["labels"]["annotation_tier"].lower() in tiers), \
229 |         cfg["labels"]["annotation_tier"]+" not defined in tiers: check that annotation_tier in config is found in the textgrid tiers"
230 |     labels = tiers[cfg["labels"]["annotation_tier"].lower()]
231 | 
232 |     # get scale corresponding to avg unit length of selected tier
233 |     n_scales = cfg["wavelet"]["num_scales"]
234 |     scale_dist = cfg["wavelet"]["scale_distance"]
235 |     scales = (1./freqs*200)*0.5 # FIXME: hardcoded vales
236 |     unit_scale = misc.get_best_scale2(scales, labels)
237 | 
238 |     # Define the scale information (FIXME: description)
239 |     pos_loma_start_scale = unit_scale + int(cfg["loma"]["prom_start"]/scale_dist)  # three octaves down from average unit length
240 |     pos_loma_end_scale = unit_scale + int(cfg["loma"]["prom_end"]/scale_dist)
241 |     neg_loma_start_scale = unit_scale + int(cfg["loma"]["boundary_start"]/scale_dist)  # two octaves down
242 |     neg_loma_end_scale = unit_scale + int(cfg["loma"]["boundary_end"]/scale_dist)  # one octave up
243 | 
244 |     pos_loma = loma.get_loma(cwt, scales, pos_loma_start_scale, pos_loma_end_scale)
245 |     neg_loma = loma.get_loma(-cwt, scales, neg_loma_start_scale, neg_loma_end_scale)
246 | 
247 |     max_loma = loma.get_prominences(pos_loma, labels)
248 |     prominences = np.array(max_loma)
249 |     boundaries = np.array(loma.get_boundaries(max_loma, neg_loma, labels))
250 | 
251 | 
252 |     # output results
253 |     if output_dir is None:
254 |         output_dir = os.path.dirname(input_file)
255 |     os.makedirs(output_dir, exist_ok=True)
256 | 
257 |     basename = os.path.splitext(os.path.basename(input_file))[0]
258 |     output_filename = os.path.join(output_dir, "%s.prom" % basename)
259 |     print("Saving %s..." % (output_filename))
260 |     loma.save_analyses(output_filename,
261 |                        labels,
262 |                        prominences,
263 |                        boundaries)
264 | 
265 |     # Plotting
266 |     if plot != 0:
267 |         fig, ax =  plt.subplots(6, 1, sharex=True,
268 |                                 figsize=(len(labels) / 10 * 8, 8),
269 |                                 gridspec_kw = {'height_ratios':[1, 1, 1, 2, 4, 1.5]})
270 |         plt.subplots_adjust(hspace=0)
271 | 
272 |         # Plot individual signals
273 |         ax[0].plot(pitch, linewidth=1)
274 |         ax[0].set_ylabel("Pitch", rotation="horizontal", ha="right", va="center")
275 | 
276 |         ax[1].plot(energy, linewidth=1)
277 |         ax[1].set_ylabel("Energy", rotation="horizontal", ha="right", va="center")
278 | 
279 |         ax[2].plot(rate, linewidth=1)
280 |         ax[2].set_ylabel("Speech rate", rotation="horizontal", ha="right", va="center")
281 | 
282 |         # Plot combined signal
283 |         ax[3].plot(params, linewidth=1)
284 |         ax[3].set_ylabel("Combined \n signal", rotation="horizontal", ha="right", va="center")
285 |         plt.xlim(0, len(params))
286 | 
287 |         # Wavelet and loma
288 |         cwt[cwt>0] = np.log(cwt[cwt>0]+1.)
289 |         cwt[cwt<-0.1] = -0.1
290 |         ax[4].contourf(cwt,100, cmap="inferno")
291 |         loma.plot_loma(pos_loma, ax[4], color="black")
292 |         loma.plot_loma(neg_loma, ax[4], color="white")
293 |         ax[4].set_ylabel("Wavelet & \n LOMA", rotation="horizontal", ha="right", va="center")
294 |         
295 |         # Add labels
296 |         prom_text =  prominences[:, 1]/(np.max(prominences[:, 1]))*2.5 + 0.5
297 |         lab.plot_labels(labels, ypos=0.3, size=6, prominences=prom_text, fig=ax[5], boundary=False, background=False)
298 |         ax[5].set_ylabel("Labels", rotation="horizontal", ha="right", va="center")
299 |         for i in range(0, len(labels)):
300 |             for a in [0, 1, 2, 3, 4, 5]:
301 |                 ax[a].axvline(x=labels[i][0], color='black',
302 |                               linestyle="-", linewidth=0.2, alpha=0.5)
303 |                 
304 |                 ax[a].axvline(x=labels[i][1], color='black',
305 |                               linestyle="-", linewidth=0.2+boundaries[i][-1] * 2,
306 |                               alpha=0.5)
307 | 
308 |         plt.xlim(0, cwt.shape[1])
309 |     
310 |         # Align ylabels and remove axis
311 |         fig.align_ylabels(ax)
312 |         for i in range(len(ax)-1):
313 |             ax[i].tick_params(
314 |                 axis='x',          # changes apply to the x-axis
315 |                 which='both',      # both major and minor ticks are affected
316 |                 bottom=False,      # ticks along the bottom edge are off
317 |                 top=False,         # ticks along the top edge are off
318 |                 labelbottom=False) # labels along the bottom edge are off
319 |             ax[i].tick_params(
320 |                 axis='y',          # changes apply to the x-axis
321 |                 which='both',      # both major and minor ticks are affected
322 |                 left=False,      # ticks along the bottom edge are off
323 |                 right=False,         # ticks along the top edge are off
324 |                 labelleft=False) # labels along the bottom edge are off
325 | 
326 |         ax[len(ax)-1].tick_params(
327 |             axis='y',          # changes apply to the x-axis
328 |             which='both',      # both major and minor ticks are affected
329 |             left=False,      # ticks along the bottom edge are off
330 |             right=False,         # ticks along the top edge are off
331 |             labelleft=False) # labels along the bottom edge are off
332 | 
333 |         # Plot
334 |         if plot < 0:
335 |             output_filename = os.path.join(output_dir, "%s.png" % basename)
336 |             logger.info("Save plot %s" % output_filename)
337 |             fig.savefig(output_filename, bbox_inches='tight', dpi=400)
338 |         elif plot > 0:
339 |             plt.show()
340 | 
341 | def analysis_batch_wrap(input_file, cfg, annotation_dir=None, output_dir=None, plot=0, logger=None):
342 |     # Encapsulate running
343 |     try:
344 |         print(".")
345 |         analysis(input_file, cfg, logger, annotation_dir, output_dir, plot)
346 |     except Exception as ex:
347 |         logging.error(str(ex))
348 |         traceback.print_exc(file=sys.stderr)
349 | 
350 | 
351 | ###############################################################################
352 | # Main function
353 | ###############################################################################
354 | def main():
355 |     """Main entry function
356 |     """
357 |     global args, logger
358 | 
359 |     # Load configuration
360 |     configuration = defaultdict()
361 |     with open(os.path.dirname(os.path.realpath(__file__)) + "/configs/default.yaml", 'r') as f:
362 |         configuration = apply_configuration(configuration, defaultdict(lambda: False, yaml.load(f, Loader=yaml.FullLoader)))
363 | 
364 |     if args.config:
365 |         try:
366 |             with open(args.config, 'r') as f:
367 |                 configuration = apply_configuration(configuration, defaultdict(lambda: False, yaml.load(f, Loader=yaml.FullLoader)))
368 |         except IOError as ex:
369 |             print("configuration file " + args.config + " could not be loaded:")
370 | 
371 |             sys.exit(1)
372 |     logger.debug("Current confirugration:")
373 |     logger.debug(configuration)
374 | 
375 |     # Get the number of jobs
376 |     nb_jobs = args.nb_jobs
377 | 
378 |     # Loading files
379 |     if os.path.isfile(args.input):
380 |         input_files = [args.input]
381 |     else:
382 |         input_files = glob.glob(args.input + "/*.wav")
383 |     if len(input_files) == 1:
384 |         nb_jobs = 1
385 | 
386 |     plot_flag = 0
387 |     if nb_jobs > 1:
388 |         if args.plot:
389 |             plot_flag = -1
390 |         Parallel(n_jobs=nb_jobs, verbose=args.verbosity)(delayed(analysis_batch_wrap)(f, configuration, args.annotation_directory, args.output_directory, plot_flag, logger) for f in input_files)
391 |     else:
392 |         if args.plot:
393 |             plot_flag = 1
394 |         for f in input_files:
395 |             analysis(f, configuration, logger, args.annotation_directory, args.output_directory, plot_flag)
396 | 
397 | 
398 | ###############################################################################
399 | #  Envelopping
400 | ###############################################################################
401 | if __name__ == '__main__':
402 |     try:
403 |         parser = argparse.ArgumentParser(description="Command line application to analyze prosody using wavelets.")
404 | 
405 |         # Add options
406 |         parser.add_argument("-a", "--annotation_directory", default=None, type=str,
407 |                             help="Annotation directory. If not specified, the tool will by default try to load annotations from the directory containing the wav files")
408 |         parser.add_argument("-j", "--nb_jobs", default=4, type=int,
409 |                             help="Define the number of jobs to run in parallel")
410 |         parser.add_argument("-c", "--config", default=None, type=str,
411 |                             help="configuration file")
412 |         parser.add_argument("-l", "--log_file", default=None, type=str,
413 |                             help="Logger file")
414 |         parser.add_argument("-o", "--output_directory", default=None, type=str,
415 |                             help="The output directory. If not specified, the tool will output the result in a .prom file in the same directory than the wave files")
416 |         parser.add_argument("-p", "--plot", default=False, action="store_true",
417 |                             help="Plot the result (the number of jobs is de facto set to 1 if activated)")
418 |         parser.add_argument("-v", "--verbosity", action="count", default=1,
419 |                             help="increase output verbosity")
420 | 
421 |         # Add arguments
422 |         parser.add_argument("input", help="directory with wave files or wave file to analyze (a label file with the same basename should be available)")
423 | 
424 | 
425 | 
426 |         # Parsing arguments
427 |         args = parser.parse_args()
428 |         if args.plot:
429 |             args.nb_jobs = 1
430 |         # Get the logger
431 |         logger = get_logger(args.verbosity, args.log_file)
432 | 
433 |         # Debug time
434 |         start_time = time.time()
435 |         logger.info("start time = " + time.asctime())
436 | 
437 |         # Running main function <=> run application
438 |         main()
439 | 
440 |         # Debug time
441 |         logger.info("end time = " + time.asctime())
442 |         logger.info('TOTAL TIME IN MINUTES: %02.2f' %
443 |                      ((time.time() - start_time) / 60.0))
444 | 
445 |         # Exit program
446 |         sys.exit(0)
447 |     except KeyboardInterrupt as e:  # Ctrl-C
448 |         raise e
449 |     except SystemExit:  # sys.exit()
450 |         pass
451 |     except Exception as e:
452 |         logging.error('ERROR, UNEXPECTED EXCEPTION')
453 |         logging.error(str(e))
454 |         traceback.print_exc(file=sys.stderr)
455 |         sys.exit(-1)
456 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_tools/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | AUTHOR
 6 |     - Antti Suni <antti.suni@helsinki.fi>
 7 |     - Sébastien Le Maguer <lemagues@tcd.ie>
 8 | 
 9 | DESCRIPTION
10 |     Package which provides the modules to achieve wavelet analysis of speech prosody.
11 | 
12 | LICENSE
13 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
14 | """
15 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_tools/cwt_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | AUTHOR
  6 |     - Antti Suni <antti.suni@helsinki.fi>
  7 |     - Sébastien Le Maguer <lemagues@tcd.ie>
  8 | 
  9 | DESCRIPTION
 10 |     Module which provides continuous wavelet transform (cwt) analysis/synthesis routines
 11 | 
 12 | LICENSE
 13 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
 14 | """
 15 | 
 16 | from numpy import array,concatenate, sqrt, pad, mean, std, real, nan, zeros, nanmean, nanstd, pi, around, log2
 17 | 
 18 | import pycwt as cwt
 19 | 
 20 | ###########################################################################################
 21 | # Private routines
 22 | ###########################################################################################
 23 | def _unpad(matrix, num):
 24 |     """Private function to unpad axis 1 of a matrix
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     matrix: ndarray
 29 |         a NDarray
 30 |     num: int
 31 |        the unpadding size
 32 | 
 33 |     Returns
 34 |     -------
 35 |     ndarray
 36 |     	the unpadded matrix
 37 |     """
 38 |     unpadded = matrix[:,num:len(matrix[0])-num]
 39 |     return unpadded
 40 | 
 41 | 
 42 | def _padded_cwt(params, dt, dj, s0, J, mother, padding_len):
 43 |     """Private function to compute a wavelet transform on padded data
 44 | 
 45 |     Parameters
 46 |     ----------
 47 |     params: arraylike
 48 |         The prosodic parameters.
 49 |     dt: ?
 50 |         ?
 51 |     dj: ?
 52 |         ?
 53 |     s0: ?
 54 |         ?
 55 |     J: ?
 56 |         ?
 57 |     mother: ?
 58 |         The mother wavelet.
 59 |     padding_len: int
 60 |         The padding length
 61 | 
 62 |     Returns
 63 |     -------
 64 |     wavelet_matrix: ndarray
 65 |     	The wavelet data resulting from the analysis
 66 |     scales: arraylike
 67 |     	The scale indices corresponding to the wavelet data
 68 |     freqs: ?
 69 |     	?
 70 |     coi: array
 71 |     	The cone of influence values
 72 |     fft: ?
 73 |     	?
 74 |     fftfreqs: ?
 75 |     	?
 76 |     """
 77 |     #padded = concatenate([params,params,params])
 78 |     padded = pad(params, padding_len, mode='edge') #edge
 79 |     wavelet_matrix, scales, freqs, coi, fft, fftfreqs = cwt.cwt(padded, dt, dj, s0, J, mother)
 80 |     wavelet_matrix = _unpad(wavelet_matrix, padding_len)
 81 |     #wavelet_matrix = _unpad(wavelet_matrix, len(params))
 82 | 
 83 |     return (wavelet_matrix, scales, freqs, coi, fft, fftfreqs)
 84 | 
 85 | 
 86 | 
 87 | 
 88 | def _zero_outside_coi(wavelet_matrix,freqs, rate = 200):
 89 |     """Private function to set each elements outside of the Cone Of Influence (coi) to 0.
 90 |   
 91 |     Parameters
 92 |     ----------
 93 |     wavelet_matrix: type
 94 |         description
 95 |     freqs: type
 96 |         description
 97 | 
 98 |     """
 99 |     for i in range(0,wavelet_matrix.shape[0]):
100 |         coi =int(1./freqs[i]*rate)
101 |         wavelet_matrix[i,0:coi] = 0.
102 |         wavelet_matrix[i,-coi:] = 0.
103 |     return wavelet_matrix
104 | 
105 | def _scale_for_reconstruction(wavelet_matrix,scales, dj, dt,mother="mexican_hat",period=3):
106 |     """ ?
107 | 
108 |     Parameters
109 |     ----------
110 |     wavelet_matrix: ndarray
111 |     	The wavelet data resulting from the analysis
112 |     scales: arraylike
113 |     	The scale indices corresponding to the wavelet data
114 |     dj: ?
115 |         ?
116 |     dt: ?
117 |         ?
118 |     mother: ?
119 |         ?
120 |     period: ?
121 |         ?
122 | 
123 |     """
124 |     scaled = array(wavelet_matrix)
125 | 
126 |     # mexican Hat
127 |     c = dj / (3.541 * 0.867)
128 | 
129 |     if mother=="morlet":
130 |         cc = 1.83
131 | 
132 |         #periods 5 and 6 are correct, 3,4 approximate
133 |         if period == 3:
134 |             cc = 1.74
135 |         if period == 4:
136 |             cc = 1.1
137 |         elif period==5:
138 |             cc=0.9484
139 |         elif period==6:
140 |             cc == 0.7784
141 | 
142 |         c = dj / (cc * pi**(-0.25))
143 |      
144 |     for i in range(0, len(scales)):
145 |         scaled[i]*= c*sqrt(dt)/sqrt(scales[i])
146 |         # substracting the mean should not be necessary?
147 |         scaled[i]-=mean(scaled[i])
148 | 
149 |     return scaled
150 | 
151 | 
152 | def _freq2scale(freq, mother, period = 3.):
153 |     """
154 |     convert frequency to wavelet scale width
155 |     
156 |     Parameters
157 |     ----------
158 |     freq: float
159 |           frequency value in Hz
160 | 
161 |     mother: string
162 |             name of the mother wavelet ("mexican_hat", "morlet")
163 |     """
164 | 
165 |     freq = float(freq)
166 |     if mother.lower() == "mexican_hat":
167 |         return (1./freq)/(2. * pi / sqrt(2 + 0.5)) #np.sqrt(2./(2.*k0+1.)));
168 |     if mother.lower() == "morlet":
169 |         return  (1./freq)*(period + sqrt(2. + period**2))/(4 * pi)
170 |     else:
171 |         return (1./freq)/ (4. * pi / (2. * period + 1.))
172 |      
173 | ###########################################################################################
174 | # Public routines
175 | ###########################################################################################
176 | def combine_scales(wavelet_matrix, slices):
177 |     """Combine the scales of given slices
178 | 
179 |     Parameters
180 |     ----------
181 |     wavelet_matrix: ndarray
182 |         The wavelet data matrix.
183 |     slices: ndarray
184 |         The slices
185 | 
186 |     Returns
187 |     -------
188 |     array
189 |     	The combined scales
190 |     """
191 |     combined_scales = []
192 | 
193 |     for i in range(0, len(slices)):
194 |         combined_scales.append(sum(wavelet_matrix[slices[i][0]:slices[i][1]]))
195 |     return array(combined_scales)
196 | 
197 | 
198 | def cwt_analysis(params, mother_name="mexican_hat",num_scales=12, first_scale = None, first_freq = None, scale_distance=1.0, apply_coi=True, period=5, frame_rate = 200):
199 |     """Achieve the continous wavelet analysis of given parameters
200 | 
201 |     Parameters
202 |     ----------
203 |     params: arraylike
204 |         The parameters to analyze.
205 |     mother_name: string, optional
206 |         The name of the mother wavelet [default: mexican_hat].
207 |     num_scales: int, optional
208 |         The number of scales [default: 12].
209 |     first_scale: int, optional
210 |         The width of the shortest scale
211 |     first_freq: int, optional
212 |         The highest frequency in Hz
213 |     scale_distance: float, optional
214 |         The distance between scales [default: 1.0].
215 |     apply_coi: boolean, optional
216 |         Apply the Cone Of Influence (coi)
217 |     period: int, optional
218 |         The period of the mother wavelet [default: 5].
219 |     frame_rate: int, optional
220 |         The signal frame rate [default: 200].
221 | 
222 |     Returns
223 |     -------
224 |     wavelet_matrix: ndarray
225 |     	The wavelet data resulting from the analysis
226 |     scales: arraylike
227 |     	The scale indices corresponding to the wavelet data
228 |     """
229 |     # setup wavelet transform
230 |    
231 |     dt = 1. /float(frame_rate)  # frame length
232 | 
233 |     if not first_scale:
234 |         first_scale = dt # first scale, here frame length
235 |     
236 |     if first_freq:
237 |         first_scale = _freq2scale(first_freq, mother_name, period)
238 |         
239 |     dj = scale_distance  # distance between scales in octaves
240 |     J =  num_scales #  number of scales
241 | 
242 |     mother = cwt.MexicanHat()
243 | 
244 |     if str.lower(mother_name) == "morlet":
245 |         mother = cwt.Morlet(period)
246 |     elif str.lower(mother_name) == "paul":
247 |         mother = cwt.Paul(period)
248 | 
249 |     wavelet_matrix, scales, freqs, coi, fft, fftfreqs = _padded_cwt(params, dt, dj, first_scale, J,mother, 400)
250 |     #wavelet_matrix, scales, freqs, coi, fft, fftfreqs = cwt.cwt(f0_mean_sub, dt, dj, s0, J,mother)
251 | 
252 |     #wavelet_matrix = abs(wavelet_matrix)
253 |     wavelet_matrix = _scale_for_reconstruction((wavelet_matrix), scales, dj, dt,mother=mother_name,period=period)
254 | 
255 |     if apply_coi:
256 |         #wavelet_matrix = _zero_outside_coi(wavelet_matrix, scales/dt*0.5)
257 |         wavelet_matrix = _zero_outside_coi(wavelet_matrix, freqs, frame_rate)
258 |     import numpy as np
259 |     np.set_printoptions(precision=3, suppress=True)
260 |     return (wavelet_matrix,scales,freqs)
261 | 
262 | 
263 | def cwt_synthesis(wavelet_matrix, mean = 0):
264 |     """Synthesizing a signal given a wavelet dataset
265 | 
266 |     Parameters
267 |     ----------
268 |     wavelet_matrix: ndarray
269 |         The wavelet data matrix.
270 |     mean: float
271 |         The mean to translate the signal.
272 | 
273 |     Returns
274 |     -------
275 |     arraylike
276 |     	The generated signal
277 | 
278 |     """
279 |     return sum(wavelet_matrix[:])+mean
280 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_tools/duration_processing.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | AUTHOR
  6 |     - Antti Suni <antti.suni@helsinki.fi>
  7 |     - Sébastien Le Maguer <lemagues@tcd.ie>
  8 | 
  9 | DESCRIPTION
 10 |     Module which provides the duration routines to be able to apply a wavelet analysis
 11 | 
 12 | LICENSE
 13 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
 14 | """
 15 | 
 16 | from . import smooth_and_interp, misc
 17 | import numpy as np
 18 | 
 19 | SIL_SYMBOLS = ["#","!pau", "sp", "<s>", "pau", "!sil", "sil", "", " ","<p>", "<p:>", ".", ",","?"]
 20 | 
 21 | 
 22 | def _get_dur_stats(labels, linear=False, sil_symbols=[]):
 23 |     durations = []
 24 |     for i in range(len(labels)):
 25 |         (st,en, unit) = labels[i]
 26 |         if unit.lower() not in sil_symbols:
 27 |             dur = en-st
 28 |             if not linear:
 29 | 
 30 |                 dur = np.log(dur+1.)
 31 |             durations.append(dur)
 32 |     durations = np.array(durations)
 33 |     return (np.min(durations), np.max(durations), np.mean(durations))
 34 | 
 35 | 
 36 | def get_rate(params,p=2,hp=10,lp=150, fig=None):
 37 |     """
 38 |     estimation of speech rate as a center of gravity of wavelet spectrum
 39 |     similar to method described in "Boundary Detection using Continuous Wavelet Analysis" (2016)
 40 |     """
 41 |     from . import cwt_utils
 42 | 
 43 |     params = smooth_and_interp.smooth(params, hp)
 44 |     params -= smooth_and_interp.smooth(params, lp)
 45 | 
 46 |     wavelet_matrix, scales, freqs  = cwt_utils.cwt_analysis(params, mother_name="Morlet", \
 47 |                                                             num_scales=80, scale_distance=0.1,\
 48 |                                                             apply_coi=True,period=2)
 49 |     wavelet_matrix = abs(wavelet_matrix)
 50 | 
 51 |     rate = np.zeros(len(params))
 52 | 
 53 |     for i in range(0,wavelet_matrix.shape[1]):
 54 |         frame_en = np.sum(wavelet_matrix[:,i])
 55 |         # center of gravity
 56 |         rate[i] = np.nonzero(wavelet_matrix[:,i].cumsum() >=frame_en*0.5)[0].min()
 57 |         # maximum energy scale
 58 |         #rate[i]= np.argmax(wavelet_matrix[:,i]) #.astype('float'))
 59 | 
 60 |     if fig:
 61 |         fig.contourf((wavelet_matrix), 50)
 62 |     rate = smooth_and_interp.smooth(rate, 30)
 63 |     if fig:
 64 |         fig.plot(rate,color="black")
 65 | 
 66 |     return rate
 67 | 
 68 | 
 69 | def duration(labels, rate=200,linear=False,bump=False, sil_symbols=SIL_SYMBOLS):
 70 |     """
 71 |     construct duration signal from labels
 72 |     """
 73 | 
 74 |     dur = np.zeros(len(labels))
 75 |     params = np.zeros(int(labels[-1][1]*rate))
 76 |     prev_end = 0
 77 |     (min_dur, max_dur, mean_dur) = _get_dur_stats(labels,linear, sil_symbols)
 78 | 
 79 |     for i in range(0,len(labels)):
 80 | 
 81 |         (st,en, unit) = labels[i]
 82 |         st*=rate
 83 |         en*=rate
 84 |         dur[i] = en-st
 85 |         if not linear:
 86 |             dur[i] = np.log(dur[i]+1.)
 87 | 
 88 |         if unit.lower() in sil_symbols:
 89 |             dur[i] = min_dur
 90 | 
 91 |         # skip very short units, likely labelling errors
 92 |         if (en<=st+0.01):
 93 |             continue
 94 | 
 95 |         # unit duration -> height of the duration contour in the middle of the unit
 96 |         params[int(st+(en-st)/2.0)] = dur[i]
 97 | 
 98 |         # "bump" -> emphasize difference between adjacent unit durations
 99 |         if i > 0 and bump:
100 |             params[int(st)]= (dur[i]+dur[i-1])/2.- (abs(dur[i]-dur[i-1]))
101 | 
102 |         # handle gaps in labels similarly to silences
103 |         if  st > prev_end and i > 1:
104 |             #gap_dur = min_dur
105 |             params[int(prev_end+(st-prev_end)/2.0)] = min_dur #(gap_dur) #0.001 #-max_dur
106 |         prev_end = en
107 | 
108 |     # set endpoints to mean in order to avoid large "valleys"
109 |     params[0] = np.mean(dur)
110 |     params[-1] = np.mean(dur)
111 | 
112 |     # make continous duration contour and smooth a bit
113 |     params = smooth_and_interp.interpolate_zeros(params, 'pchip')
114 |     params = smooth_and_interp.smooth(params, 20)
115 | 
116 |     return params
117 | 
118 | 
119 | 
120 | def get_duration_signal(tiers =[], weights = [], sil_symbols=SIL_SYMBOLS,\
121 |                         rate=1, linear=True, bump=False):
122 |     """
123 |     Construct duration contour from labels. If many tiers are selected,
124 |     construct contours for each tier and return a weighted sum of those
125 | 
126 |     """
127 |     durations = []
128 |     lengths  = []
129 |     for t in tiers:
130 |         durations.append(misc.normalize_std(duration(t, rate=rate, sil_symbols=sil_symbols,\
131 |                                                      linear=linear, bump=bump)))
132 | 
133 |     durations = misc.match_length(durations)
134 |     sum_durations =np.zeros(len(durations[0]))
135 | 
136 |     if len(weights)!=len(tiers):
137 |         weights = np.ones(len(tiers))
138 |     for i in range(len(durations)):
139 |         sum_durations+=durations[i]*weights[i]
140 | 
141 |     return (sum_durations)
142 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_tools/energy_processing.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | AUTHOR
 6 |     - Antti Suni <antti.suni@helsinki.fi>
 7 |     - Sébastien Le Maguer <lemagues@tcd.ie>
 8 | 
 9 | DESCRIPTION
10 |     Module which provides the energy routines to be able to apply a wavelet analysis
11 | 
12 | LICENSE
13 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
14 | """
15 | 
16 | import numpy as np
17 | from . import smooth_and_interp, misc
18 | 
19 | 
20 | # Logging
21 | import logging
22 | logger = logging.getLogger(__name__)
23 | 
24 | 
25 | 
26 | def extract_energy(orig_waveform, fs=16000, min_freq=200, max_freq=3000, method='rms', target_rate=200):
27 |     #python 2, 3 compatibility hack
28 |     try:
29 |         basestring
30 |     except NameError:
31 |         basestring = str
32 | 
33 | 
34 |     # accept both wav-files and waveform arrays
35 |     if isinstance(orig_waveform, basestring):
36 |         (fs, waveform) = misc.read_wav(orig_waveform)
37 | 
38 |     # NOTE: reconvert to int to keep the consistency
39 |     waveform = orig_waveform / 3.0517578125e-5
40 | 
41 |     import scipy.signal
42 |     from . import filter
43 |     lp_waveform =  filter.butter_bandpass_filter(waveform, min_freq, max_freq, fs, order=5)
44 | 
45 |     # verify that filtering works
46 |     #lp_waveform = waveform
47 |     #scipy.io.wavfile.write("/tmp/tmp.wav", fs, lp_waveform.astype(np.int16))
48 | 
49 |     # hilbert is sometimes prohibitively slow, should pad to next power of two
50 |     if method == 'hilbert':
51 |         energy=abs(scipy.signal.hilbert(lp_waveform))
52 | 
53 |     elif method == "true_envelope":
54 |         # window should be about one pitch period, ~ 5 ms
55 |         win = 0.005 *fs
56 |         energy = smooth_and_interp.peak_smooth(abs(lp_waveform), 200,win)
57 | 
58 |     elif method == "rms":
59 |         energy=np.sqrt(lp_waveform**2)
60 | 
61 |     logger.debug("fs = %d, target_rate = %d, fs/target_rate = %f" % (fs, target_rate, fs/target_rate))
62 |     energy = misc.resample(energy, fs, target_rate)
63 |     #energy = scipy.signal.resample_poly(energy, 1., int(round(fs/target_rate)))
64 |     logger.debug("len(energy) = %d, len(energy)/target_rate = %f" % (len(energy), len(energy)/target_rate))
65 |     return energy
66 | 
67 | 
68 | def process(energy, voicing=[]):
69 |     energy = smooth_and_interp.peak_smooth(energy, 100, 5, voicing=voicing)
70 |     return energy
71 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_tools/f0_processing.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | AUTHOR
  6 |     - Antti Suni <antti.suni@helsinki.fi>
  7 |     - Sébastien Le Maguer <lemagues@tcd.ie>
  8 | 
  9 | DESCRIPTION
 10 |     Module which provides the F0 routines to be able to apply a wavelet analysis
 11 | 
 12 | LICENSE
 13 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
 14 | """
 15 | 
 16 | 
 17 | # Global/system packages
 18 | import sys
 19 | import os
 20 | 
 21 | # Math/signal processing
 22 | import numpy as np
 23 | from scipy.io import wavfile
 24 | import pylab
 25 | 
 26 | # Local packages
 27 | from . import smooth_and_interp
 28 | from . import pitch_tracker
 29 | 
 30 | # Logging
 31 | import logging
 32 | logger = logging.getLogger(__name__)
 33 | 
 34 | # Pyreaper
 35 | try:
 36 |     import pyreaper
 37 |     USE_REAPER = True
 38 |     logger.info("Pyreaper is available")
 39 | except ImportError:
 40 |     USE_REAPER = False
 41 |     logger.debug("Pyreaper is not available so falling back into the default pitch tracker")
 42 | 
 43 | 
 44 | ###############################################################################
 45 | 
 46 | 
 47 | def rolling_window(a, window):
 48 |     shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
 49 |     strides = a.strides + (a.strides[-1],)
 50 |     return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
 51 | 
 52 | 
 53 | def _cut_boundary_vals(params, num_vals):
 54 |     cutted = np.array(params)
 55 |     for i in range(num_vals, len(params)-num_vals):
 56 |         if (params[i] <= 0) and (params[i+1] > 0):
 57 |             for j in range(i, i+num_vals):
 58 |                 cutted[j] = 0.0
 59 | 
 60 |         if (params[i] > 0) and (params[i+1] <= 0):
 61 |             for j in range(i-num_vals, i+1):
 62 |                 cutted[j] = 0.0
 63 | 
 64 |     return cutted
 65 | 
 66 | 
 67 | def _remove_outliers(lf0, trace=False):
 68 | 
 69 |     if np.nanmean(lf0[lf0 > 0]) > 10:
 70 |         raise("logF0 expected")
 71 | 
 72 |     fixed = np.array(lf0)
 73 | 
 74 |     # remove f0 values from voicing boundaries, if they make a large difference for
 75 |     # interpolation
 76 |     boundary_cut = smooth_and_interp.interpolate_zeros(_cut_boundary_vals(fixed, 3), 'linear')
 77 |     interp = smooth_and_interp.interpolate_zeros(fixed, 'linear')
 78 |     fixed[abs(interp-boundary_cut) > 0.1] = 0
 79 |     interp = smooth_and_interp.interpolate_zeros(fixed, 'linear')
 80 | 
 81 |     # iterative outlier removal
 82 |     # 1. compare current contour estimate to a smoothed contour and remove deviates larger than threshold
 83 |     # 2. smooth current estimate with shorter window, thighten threshold
 84 |     # 3. goto 1.
 85 | 
 86 |     # In practice, first handles large scale octave jump type errors,
 87 |     # finally small scale 'errors' like consonant perturbation effects and
 88 |     # other irregularities in voicing boundaries
 89 |     #
 90 |     # if this appears to remove too many correct values, increase thresholds
 91 |     num_iter = 30
 92 |     max_win_len = 100
 93 |     min_win_len = 10  # 20
 94 |     max_threshold = 3.  # threshold with broad window
 95 | 
 96 |     min_threshold = 0.5  # threshold with shorted window
 97 | 
 98 |     if trace:
 99 |         pylab.rcParams['figure.figsize'] = 20, 5
100 |         pylab.figure()
101 |         pylab.title("outlier removal")
102 | 
103 |     _std = np.std(interp)
104 |     # do not tie fixing to liveliness of the original
105 |     _std = 0.3
106 | 
107 |     win_len = np.exp(np.linspace(np.log(max_win_len), np.log(min_win_len),
108 |                                  num_iter+1))
109 |     outlier_threshold = np.linspace(_std*max_threshold, _std*min_threshold,
110 |                                     num_iter+1)
111 |     for i in range(0, num_iter):
112 |         smooth_contour = smooth_and_interp.smooth(interp, win_len[i])
113 |         low_limit = smooth_contour - outlier_threshold[i]
114 |         hi_limit = smooth_contour + outlier_threshold[i]*1.5  # bit more careful upwards, not to cut emphases
115 | 
116 |         # # octave jump down fix, more harm than good?
117 |         # fixed[interp<smooth_contour-0.45]=interp[interp<smooth_contour-0.45]+0.5
118 |         # fixed[interp>smooth_contour+0.45]=interp[interp>smooth_contour+0.45]-0.5
119 |         fixed[interp > hi_limit] = 0
120 |         fixed[interp < low_limit] = 0
121 | 
122 |         if trace:
123 |             pylab.clf()
124 |             pylab.title("outlier removal %d" % i)
125 |             # pylab.ylim(3.5,7)
126 |             pylab.plot((low_limit), 'black', linestyle='--')
127 |             pylab.plot((hi_limit), 'black', linestyle='--')
128 |             pylab.plot((smooth_contour), 'black', linestyle='--')
129 |             pylab.plot((interp), linewidth=3)
130 |             pylab.plot(lf0)
131 |             pylab.show()
132 | 
133 |         interp = smooth_and_interp.interpolate_zeros(fixed, 'linear')
134 | 
135 |     # if trace:
136 |     #     raw_input("press any key to continue")
137 | 
138 |     return fixed
139 | 
140 | 
141 | def _interpolate(f0, method="true_envelope"):
142 | 
143 |     if method == "linear":
144 |         return smooth_and_interp.interpolate_zeros(f0, 'linear')
145 |     elif method == "pchip":
146 |         return smooth_and_interp.interpolate_zeros(f0, 'pchip')
147 | 
148 |     elif method == 'true_envelope':
149 |         interp = smooth_and_interp.interpolate_zeros(f0)
150 | 
151 |         _std = np.std(interp)
152 |         _min = np.min(interp)
153 |         low_limit = smooth_and_interp.smooth(interp, 200)-1.5*_std
154 |         low_limit[low_limit < _min] = _min
155 |         hi_limit = smooth_and_interp.smooth(interp, 100)+2.0*_std
156 |         voicing = np.array(f0)
157 |         constrained = np.array(f0)
158 |         constrained = np.maximum(f0, low_limit)
159 |         constrained = np.minimum(constrained, hi_limit)
160 | 
161 |         interp = smooth_and_interp.peak_smooth(constrained, 100, 20,
162 |                                                voicing=voicing)
163 |         # smooth voiced parts a bit too
164 |         interp = smooth_and_interp.peak_smooth(interp, 3, 2)  # ,voicing=raw)
165 |         return interp
166 |     else:
167 |         raise("no such interpolation method: %s", method)
168 | 
169 | 
170 | def extract_f0(waveform, fs=16000, f0_min=30, f0_max=550, harmonics=10., voicing=50., configuration="pitch_tracker"):
171 |     """Extract F0 from a waveform
172 | 
173 |     """
174 |     # first determine f0 without limits, then use mean and std of the first estimate
175 |     # to limit search range.
176 |     if (f0_min == 0) or (f0_max == 0):
177 |         if USE_REAPER and (configuration == "REAPER"):
178 |             _, _, _, f0, _ = pyreaper.reaper(waveform, fs, f0_min, f0_max)
179 |         else:
180 |             (f0, _) = pitch_tracker.inst_freq_pitch(waveform, fs, f0_min, f0_max, harmonics, voicing, False, 200)
181 | 
182 |         mean_f0 = np.mean(f0[f0 > 0])
183 |         std_f0 = np.std(f0[f0 > 0])
184 |         f0_min = max((mean_f0 - 3*std_f0, 40.0))
185 |         f0_max = mean_f0 + 6*std_f0
186 | 
187 |         logger.debug("f0_min = %f, f0_max = %f" % (f0_min, f0_max))
188 | 
189 |     if USE_REAPER and (configuration == "REAPER"):
190 |         _, _, _, f0, _ = pyreaper.reaper(waveform, fs, f0_min, f0_max)
191 |     else:
192 |         (f0, _) = pitch_tracker.inst_freq_pitch(waveform, fs, f0_min, f0_max, harmonics, voicing, False, 200)
193 | 
194 |     return f0
195 | 
196 | 
197 | def process(f0, fix_outliers=True, interpolate=True, do_trace=False):
198 | 
199 |     lf0 = np.array(f0)
200 |     log_scaled = True
201 |     if np.mean(f0[f0 > 0]) > 20:
202 |         log_scaled = False
203 |         lf0[f0 > 0] = np.log(f0[f0 > 0])
204 |         lf0[f0 <= 0] = 0
205 | 
206 |     if fix_outliers:
207 |         lf0 = _remove_outliers(lf0, trace=do_trace)
208 |     if interpolate:
209 |         lf0 = _interpolate(lf0, 'true_envelope')
210 |     if not log_scaled:
211 |         return np.exp(lf0)
212 |     else:
213 |         return lf0
214 | 
215 | 
216 | # this is temporary: assumes 5ms frame shift,
217 | # assumes format to be either one f0 value per line
218 | # or praat matrix format
219 | 
220 | def read_f0(filename):
221 |     import os.path
222 |     for ext in [".f0", ".F0"]:
223 |         f0_f = os.path.splitext(filename)[0]+ext
224 | 
225 |         if os.path.exists(f0_f):
226 |             logger.info("reading F0 file", f0_f)
227 |             try:
228 |                 # one f0 value per line
229 |                 return np.loadtxt(f0_f)
230 |             except:
231 |                 # praat matrix
232 |                 try:
233 |                     return np.loadtxt(f0_f, skiprows=4)
234 |                 except:
235 |                     logger.error("unknown format for F0 value in file \"%s\"" % filename)
236 | 
237 |     return None
238 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_tools/filter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | AUTHOR
 6 |     - Antti Suni <antti.suni@helsinki.fi>
 7 |     - Sébastien Le Maguer <lemagues@tcd.ie>
 8 | 
 9 | DESCRIPTION
10 |     Butter filter utilities
11 | 
12 |     This module contains butter filter help functions copied from http://scipy-cookbook.readthedocs.io/items/ButterworthBandpass.html
13 | 
14 | LICENSE
15 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
16 | """
17 | 
18 | from scipy.signal import butter, lfilter
19 | 
20 | # Logging
21 | import logging
22 | logger = logging.getLogger(__name__)
23 | 
24 | def butter_bandpass(lowcut, highcut, fs, order=5):
25 |     """Generate the butter bandpass filter
26 | 
27 |     For more details see scipy.signal.butter documentation
28 | 
29 |     Parameters
30 |     ----------
31 |     lowcut: int
32 |         The low cut value
33 |     highcut: type
34 |         description
35 |     fs: int
36 |         Signal sample rate
37 |     order: int
38 |         Order of the butter fiter
39 | 
40 |     Returns
41 |     -------
42 |     b: arraylike
43 |     	Numerator polynomial of the IIR filter
44 |     a: arraylike
45 |     	Denominator polynomial of the IIR filter
46 |     """
47 |     nyq = 0.5 * fs
48 |     low = lowcut / nyq
49 |     if highcut >=nyq*0.95:
50 |         highcut = nyq*0.95
51 |     high = highcut / nyq
52 |     b, a = butter(order, [low, high], btype='band')
53 | 
54 |     return b, a
55 | 
56 | 
57 | def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
58 |     """Filter signal data using a butter filter type
59 | 
60 |     For more details see scipy.signal.butter and scipy.signal.lfilter documentation
61 | 
62 |     Parameters
63 |     ----------
64 |     data: arraylike
65 |         An N-dimensional input array.
66 |     lowcut: int
67 |         The lowcut filtering value.
68 |     highcut: type
69 |         The highcut filtering value.
70 |     fs: int
71 |         The signal sample rate.
72 |     order: int
73 |         The order of the butter filter.
74 | 
75 |     Returns
76 |     -------
77 |     arraylike
78 |     	An N-dimensional filtered array
79 |     """
80 |     b, a = butter_bandpass(lowcut, highcut, fs, order=order)
81 |     y = lfilter(b, a, data)
82 | 
83 |     return y
84 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_tools/lab.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | AUTHOR
  6 |     - Antti Suni <antti.suni@helsinki.fi>
  7 |     - Sébastien Le Maguer <lemagues@tcd.ie>
  8 | 
  9 | DESCRIPTION
 10 |     This module provide routines to deal with annotation in TextGrid or HTK label formats
 11 | 
 12 | LICENSE
 13 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
 14 | """
 15 | 
 16 | def read_textgrid(filename, sample_rate=200):
 17 |     import tgt
 18 |     try:
 19 |         tg = tgt.read_textgrid(filename) #, include_empty_intervals=True)
 20 |     except:
 21 |         print("reading "+filename+" failed")
 22 | 
 23 |         return
 24 |     tiers = []
 25 |     labs = {}
 26 | 
 27 |     for tier in tg.get_tier_names():
 28 |         if (tg.get_tier_by_name(tier)).tier_type()!='IntervalTier':
 29 |             continue
 30 |         tiers.append(tg.get_tier_by_name(tier))
 31 | 
 32 |         lab = []
 33 |         for a in tiers[-1].annotations:
 34 | 
 35 |             try:
 36 |                 # this was for some past experiment
 37 |                 if a.text in ["p1","p2","p3","p4","p5","p6","p7"]:
 38 |                     lab[-1][-1]=lab[-1][-1]+"_"+a.text
 39 |                 else:
 40 |                 #lab.append([a.start_time*sample_rate,a.end_time*sample_rate,a.text.encode('utf-8')])
 41 |                     lab.append([a.start_time*sample_rate,a.end_time*sample_rate,a.text])
 42 |             except:
 43 |                 pass
 44 |             #print tiers[-1].encode('latin-1')
 45 |         labs[tier.lower()] = lab
 46 |     try:
 47 |         for i in range(len(labs['prosody'])):
 48 |             if labs['prosody'][i][2][-2:] not in ["p1","p2","p3","p4","p5","p6","p7"]:
 49 |                 labs['prosody'][i][2]+="_p0"
 50 |     except:
 51 |         pass
 52 |     
 53 |     return labs
 54 | 
 55 | 
 56 | def htk_to_ms(htk_time):
 57 |     """
 58 |     Convert time in HTK (100 ns) units to 5 ms
 59 |     """
 60 |     if type(htk_time)==type("string"):
 61 |         htk_time = float(htk_time)
 62 |     return htk_time / 50000.0
 63 | 
 64 | 
 65 | def read_htk_label(fname, scale = "word", htk_time=True, only_words=False):
 66 |     """
 67 |     Read HTK label, assume: "start end phone word", where word is optional.
 68 |     Convert times from HTK units to MS
 69 |     """
 70 |     import codecs
 71 | 
 72 |     try:
 73 | 
 74 |         f = codecs.open(fname,"r", "utf-8")
 75 |         #f = open(fname, "r")
 76 |     except:
 77 |         #raw_input()
 78 |         raise Exception("htk label file %s not found" % fname)
 79 | 
 80 |     label = f.readlines()
 81 |     f.close()
 82 | 
 83 |     label = [line.split() for line in label] ## split lines on whitespace
 84 | 
 85 |     segments = []
 86 |     words = []
 87 |     prev_end = 0.0
 88 |     prev_start = 0.0
 89 |     prev_word = "!SIL"
 90 |     prev_segment = ""
 91 |     orig_start=0
 92 |     orig_end = 0
 93 |     word = ""
 94 |     for line in label:
 95 |         if len(line)==4 and line[2] == 'skip':
 96 |             continue
 97 |         word = False
 98 |         if len(line)==3:
 99 |             (start,end,segment) = line
100 |             if start == "nan":
101 |                 continue
102 | 
103 |         elif len(line)==4:
104 |             (start,end,segment,word) = line
105 | 
106 |         else:
107 | 
108 |             print("Bad line length:")
109 |             print(line)
110 | 
111 |             continue
112 |             #sys.exit(1)
113 |         if htk_time == True:
114 |             end = htk_to_ms(int(end))
115 |             start = htk_to_ms(int(start))
116 |         else:
117 |             # 5ms frame
118 |             pass
119 |             end = float(end)*200
120 |             start = float(start)*200
121 |         if start == end:
122 |             continue
123 |         prev_end = start
124 | 
125 |         segments.append([int(start), int(end), segment]) #
126 | 
127 |         # handle the last word too
128 | 
129 |         """
130 |         if word or segments[-1][2] in ["SIL", "pause", '#']:
131 |             try:
132 |                 if prev_word not in ["!SIL", "pause"] and prev_word[0]!= "!" and prev_word[0]!="_"  and prev_word[0]!='#':
133 |                     words.append([int(prev_start), int(prev_end),prev_word]) #, prev_word])
134 |             except:
135 |                 pass
136 |         """
137 |         if word:
138 | 
139 |             words.append([int(prev_start), int(prev_end),prev_word])
140 |             prev_start = start
141 |             prev_word = word
142 |             word = ""
143 |     if len(label[-1])==4:
144 |         words.append([htk_to_ms(float(label[-1][0])), htk_to_ms(float(label[-1][1])), label[-1][3]])
145 |     labs = {}
146 |     if len(words) > 0:
147 |         labs["words"] = words
148 |     labs["segments"] = segments
149 | 
150 | 
151 |     return labs
152 | 
153 | 
154 | def plot_labels(labels,shift = 0,  fig="", text = True, ypos = -0.5, color="black",
155 |                 boundary=True, size =9,prominences=[], rotation=30, background=True,rate = 1.):
156 |     import numpy as np
157 |     import pylab
158 |     if fig == "":
159 |         fig = pylab
160 | 
161 |     #print labels
162 |     if len(prominences) == 0:
163 |         prominences = np.ones(len(labels))
164 |     else:
165 |         prominences = np.sqrt(np.array(prominences)+0.25) #/np.max(prominences)
166 |     import matplotlib as mpl
167 |     mpl.rcParams['font.family'] = 'fantasy'
168 |     mpl.rcParams['font.fantasy'] = 'Ubuntu' #'Arial'
169 | 
170 |     import matplotlib.patches as patches
171 | 
172 |     i = 0
173 |     for (start, end, segment) in labels:
174 |         start*=rate
175 |         end*=rate
176 |         if text and segment[0] != "!":
177 | 
178 | 
179 |             try:
180 |                
181 |                 t =fig.text(start+(end-start)/2,ypos, segment, color=color,fontsize=size*(prominences[i]+0.5)*1,\
182 |                             ha='center',alpha=0.75, rotation=rotation, clip_on=True)
183 |                 if background:
184 |                     t.set_bbox(dict(facecolor='grey', alpha=0.3, edgecolor='grey'))
185 |             except:
186 |                 pass
187 | 
188 |             if boundary:
189 |                 fig.axvline(x=start, color='gray',linestyle="-",alpha=0.5)
190 |                 fig.axvline(x=end, color='gray',linestyle="-",alpha=0.5)
191 | 
192 |         i+=1
193 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_tools/loma.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | AUTHOR
  6 |     - Antti Suni <antti.suni@helsinki.fi>
  7 |     - Sébastien Le Maguer <lemagues@tcd.ie>
  8 | 
  9 | DESCRIPTION
 10 |     Module which provides Line Of Maximum Amplitude (loma) related routines
 11 | 
 12 | LICENSE
 13 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
 14 | """
 15 | 
 16 | import numpy as np
 17 | from operator import itemgetter
 18 | 
 19 | from wavelet_prosody_toolkit.prosody_tools import misc
 20 | 
 21 | # Logging
 22 | import logging
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | 
 26 | def save_analyses(fname, labels, prominences, boundaries, frame_rate=200, with_header=False):
 27 |     """Save analysis into a csv formatted this way
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     fname: string
 32 |         The output csv filename
 33 |     labels: list of tuple (float, float, string)
 34 |         List of labels which are lists of 3 elements [start, end, description]
 35 |     prominences: type
 36 |         description
 37 |     boundaries: type
 38 |         description
 39 |     frame_rate: int
 40 |         The speech frame rate
 41 |     with_header: boolean
 42 |         Write the header (True) or not (False) [default: False]
 43 | 
 44 |     """
 45 |     import os.path
 46 | 
 47 |     # Fill Header
 48 |     if with_header:
 49 |         header = ("Basename", "start", "end", "label", "prominence", "boundary")
 50 | 
 51 |     # Generate content
 52 |     lines = []
 53 |     for i in range(0, len(labels)):
 54 |         lines.append(("%s" %(os.path.splitext(os.path.basename(fname))[0] ),
 55 |                       "%.3f" %(float(labels[i][0]/frame_rate)),
 56 |                       "%.3f" %(float(labels[i][1]/frame_rate)),
 57 |                       labels[i][2],
 58 |                       #"%.3f" %(float(prominences[i][0]/frame_rate)),
 59 |                       "%.3f" %(prominences[i][1]),
 60 |                       "%.3f" %(boundaries[i][1])))
 61 | 
 62 |     logger.debug("Saving %s with following content:" % fname)
 63 |     if with_header:
 64 |         logger.debug(header)
 65 |     logger.debug(lines)
 66 | 
 67 |     import codecs
 68 |     with codecs.open(fname, "w", "utf-8") as prom_f:
 69 |         if with_header:
 70 |             prom_f.write(u"\t".join(header) + u"\n")
 71 | 
 72 |         for i in range(0,len(lines)):
 73 |             prom_f.write(u'\t'.join(lines[i])+u"\n")
 74 | 
 75 | 
 76 | def simplify(loma):
 77 |     """?
 78 | 
 79 |     Parameters
 80 |     ----------
 81 |     loma: type
 82 |         description
 83 | 
 84 |     """
 85 |     simplified = []
 86 |     for l in loma:
 87 |         # align loma to it's position in the middle of the line
 88 |         pos =  l[int(len(l)/2.0)][0]
 89 |         strength = l[-1][1]
 90 |         simplified.append((pos,strength))
 91 |     return simplified
 92 | 
 93 | 
 94 | def get_prominences(pos_loma, labels, rate=1):
 95 |     """?
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     pos_loma: list of ?
100 |         Positive loma values
101 |     labels: list of tuple (float, float, string)
102 |         List of labels which are lists of 3 elements [start, end, description]
103 |     rate: int
104 |         ?
105 | 
106 |     """
107 |     max_word_loma = []
108 |     loma = simplify(pos_loma)
109 |     for (st, end, unit) in labels:
110 |         st*=rate
111 |         end*=rate
112 |         word_loma = []
113 |         for l in loma:
114 |             if l[0] >=st and l[0]<=end:
115 |                 word_loma.append(l)# l[1])
116 |         if len(word_loma)> 0:
117 |             max_word_loma.append(sorted(word_loma, key=itemgetter(1))[-1])
118 |         else:
119 |             max_word_loma.append([st+(end-st)/2.0, 0.])
120 | 
121 |     return max_word_loma
122 | 
123 | 
124 | def get_boundaries(max_word_loma,boundary_loma, labels):
125 |     """get strongest lines of minimum amplitude between adjacent words' max lines
126 | 
127 |     Parameters
128 |     ----------
129 |     max_word_loma: type
130 |         description
131 |     boundary_loma: type
132 |         description
133 |     labels: type
134 |         description
135 | 
136 |     """
137 |     boundary_loma = simplify(boundary_loma)
138 |     max_boundary_loma = []
139 |     st = 0
140 |     end=0
141 |     for i in range(1, len(max_word_loma)):
142 |         w_boundary_loma = []
143 |         for l in boundary_loma:
144 |             st = max_word_loma[i-1][0]
145 |             end = max_word_loma[i][0]
146 |             if l[0] >=st and l[0]<end:
147 |                 if l[1] > 0:
148 |                     w_boundary_loma.append(l)
149 | 
150 |         if len(w_boundary_loma) > 0:
151 |             max_boundary_loma.append(sorted(w_boundary_loma, key=itemgetter(1))[-1])
152 |         else:
153 |             max_boundary_loma.append([st+(end-st)/2, 0])
154 | 
155 |     # final boundary is not estimated
156 |     max_boundary_loma.append((labels[-1][1],1))
157 |     return max_boundary_loma
158 | 
159 | 
160 | def _get_parent(child_index, parent_diff, parent_indices):
161 |     """Private function to find the parent of the given child peak. At child peak index, follow the
162 |     slope of parent scale upwards to find parent
163 | 
164 |     Parameters
165 |     ----------
166 |     child_index: int
167 |         Index of the current child peak
168 |     parent_diff: list of ?
169 |         ?
170 |     parent_indices: list of int ?
171 |         Indices of available parents
172 | 
173 |     Returns
174 |     _______
175 |     int
176 |     	The parent index or None if there is no parent
177 |     """
178 |     for i in range(0, len(parent_indices)):
179 |         if (parent_indices[i] > child_index):
180 |             if (parent_diff[int(child_index)] > 0):
181 |                 return parent_indices[i]
182 |             else:
183 |                 if i > 0:
184 |                     return parent_indices[i-1]
185 |                 else:
186 |                     return parent_indices[0]
187 | 
188 |     if len(parent_indices) > 0:
189 |         return parent_indices[-1]
190 |     return None
191 | 
192 | def get_loma(wavelet_matrix, scales, min_scale, max_scale):
193 |     """Get the Line Of Maximum Amplitude (loma)
194 | 
195 |     Parameters
196 |     ----------
197 |     wavelet_matrix: matrix of float
198 |         The wavelet matrix
199 |     scales: list of int
200 |         The list of scales
201 |     min_scale: int
202 |         The minimum scale
203 |     max_scale: int
204 |         The maximum scale
205 | 
206 |     Returns
207 |     -------
208 |     list of tuples
209 |     	?
210 | 
211 |     Note
212 |     ----
213 |     change this so that one level is done in one chunk, not one parent.
214 |     """
215 |     psize = 100.0
216 |     min_peak = -10000.0 # minimum peak amplitude to consider. NOTE:this has no meaning unless scales normalized
217 |     max_dist = 10 # how far in time to look for parent peaks. NOTE: frame rate and scale dependent, FIXME: how dependent?
218 | 
219 |     # get peaks from the first scale
220 |     (peaks,indices) = misc.get_peaks(wavelet_matrix[min_scale],min_peak)
221 | 
222 |     loma=dict()
223 |     root=dict()
224 |     for i in range(0,len(peaks)):
225 |         loma[indices[i]]=[]
226 | 
227 |         # keep track of roots of each loma
228 |         root[indices[i]] = indices[i]
229 | 
230 |     for i in range(min_scale+1, max_scale):
231 |         max_dist = np.sqrt(scales[i])*4
232 | 
233 | 	# find peaks in the parent scale
234 |         (p_peaks,p_indices) = misc.get_peaks(wavelet_matrix[i], min_peak)
235 | 
236 |         parents = dict(zip(p_indices, p_peaks))
237 | 
238 |         # find a parent for each child peak
239 |         children = dict()
240 |         for p in p_indices:
241 |             children[p] = []
242 | 
243 |         parent_diff = np.diff(wavelet_matrix[i],1)
244 |         for j in range(0,len(indices)):
245 |             parent =_get_parent(indices[j], parent_diff, p_indices)
246 |             if parent:
247 |                 if abs(parent-indices[j]) < max_dist and peaks[j] > min_peak:#  np.std(wavelet_matrix[i])*0.5:
248 |                     children[parent].append([indices[j],peaks[j]])
249 |         peaks=[];indices = []
250 | 
251 |         # for each parent, select max child
252 | 
253 |         for p in children:
254 | 
255 |             if len(children[p]) > 0:
256 | 		# maxi[0]: index
257 | 		# maxi[1]: peak height
258 |                 maxi = sorted(children[p], key=itemgetter(1))[-1]
259 |                 indices.append(p)
260 |                 peaks.append(maxi[1]+parents[p])
261 | 
262 |                 #append child to correct loma
263 |                 loma[root[maxi[0]]].append([maxi[0],maxi[1]+parents[p], i, p])
264 |                 root[p] = root[maxi[0]]
265 | 
266 | 
267 |     sorted_loma = []
268 |     for k in sorted(loma.keys()):
269 |         if  len(loma[k]) > 0:
270 |             sorted_loma.append(loma[k])
271 | 
272 |     logger.debug(simplify(sorted_loma))
273 |     return sorted_loma
274 | 
275 | 
276 | def plot_loma(loma, fig, color='black'):
277 |     """Plot the line of maximum amplitudes (loma)
278 | 
279 |     Parameters
280 |     ----------
281 |     loma: list of tuple (float, float, int, ?)
282 |         the loma values
283 |     fig: figure
284 |         the figure where the loma are going to be plotted in
285 |     color: string
286 |         the color name/code
287 | 
288 |     """
289 |     for elt in loma:
290 |         for child in elt:
291 |             i = child[2]
292 |             y = i-1
293 |             size = child[1]
294 |             fig.plot([child[0], child[3]], [(i-2), y],
295 |                      linewidth=size, color=color,
296 |                      alpha=0.45, solid_capstyle='round')
297 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_tools/misc.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | AUTHOR
  6 |     - Antti Suni <antti.suni@helsinki.fi>
  7 |     - Sébastien Le Maguer <lemagues@tcd.ie>
  8 | 
  9 | DESCRIPTION
 10 |     Module which provides a set of helper routines (wav, sginal, scales)
 11 | 
 12 | LICENSE
 13 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
 14 | """
 15 | 
 16 | import os
 17 | from scipy.signal import resample_poly
 18 | import fractions
 19 | import soundfile
 20 | import numpy as np
 21 | from pylab import ginput
 22 | 
 23 | # Logging
 24 | import logging
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | def read_wav(filename):
 29 |     """Read wave file using soundfile.read
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     filename: string
 34 |         Name of the file.
 35 | 
 36 |     Returns
 37 |     -------
 38 |     samplerate: int
 39 |         The audio signal sample rate.
 40 | 
 41 |     data: 1D arraylike
 42 |         The audio samples of the first channel with memory layout as C-order
 43 |     """
 44 |     # various packages tried.. difficulties with channels, 24bit files, various dtypes
 45 |     # pysoundfile appears to mostly work
 46 | 
 47 |     data, samplerate = soundfile.read(filename, always_2d=True)
 48 | 
 49 |     return (samplerate, data[:, 0].copy(order='C'))
 50 | 
 51 |     """Alternative solutions:
 52 |     # import wavio
 53 |     # wav = wavio.read(filename)
 54 |     # print wav.data.shape
 55 |     # pylab.plot(wav.data[:,0])
 56 |     # return (wav.rate, wav.data[:, 0])
 57 | 
 58 | 
 59 |     import scipy.io.wavfile
 60 |     try:
 61 |         return scipy.io.wavfile.read(filename)
 62 |     except Exception as e:
 63 | 
 64 |         print e
 65 |     """
 66 | 
 67 | 
 68 | def write_wav(filename, data, sr, format="WAV"):
 69 |     """Write audio file using soundfile
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     filename: string
 74 |         The name of the wave file.
 75 |     data: 1D arraylike
 76 |         The audio samples.
 77 |     sr: int
 78 |         The sample rate.
 79 |     format: string
 80 |         The output audio format (Default value is WAV for wav file).
 81 | 
 82 |     """
 83 | 
 84 |     soundfile.write(filename, data, sr, format=format)
 85 | 
 86 | 
 87 | def resample(waveform, s_sr, t_sr):
 88 |     """resampling for waveforms, should work also with when source and
 89 |     target rate ratio is fractional
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     waveform: np.array
 94 |        speech waveform, mono
 95 |     s_sr: float
 96 |        original sample rate
 97 |     t_sr: float
 98 |        target sample rate
 99 | 
100 |     returns: resampled waveform as np.array
101 |     """
102 |     ratio = fractions.Fraction(int(t_sr), int(s_sr))
103 |     return resample_poly(waveform.astype(float), ratio.numerator, ratio.denominator)
104 | 
105 | 
106 | def play(utt):
107 |     wavfile = utt + ".wav"
108 |     wavfile = wavfile.replace(" ", "\ ")
109 |     st = 0.2
110 |     end = 1
111 | 
112 |     while (st > 0.01):
113 |         try:
114 |             pts = ginput(1)
115 |             st = pts[0][0] / 200.0
116 |             end = 1.0
117 |         except:
118 |             continue
119 |         os.system("play %s trim 0:0:%f 0:0:%f " % (wavfile, st, end))
120 | 
121 | 
122 | def match_length(sig_list):
123 |     """Reduce length of all signals to a the minimum one.
124 | 
125 |     Parameters
126 |     ----------
127 |     sig_list: list
128 |         List of signals which are 1D array of samples.
129 | 
130 |     """
131 |     length = min(map(len, sig_list))
132 | 
133 |     for i in range(0, len(sig_list)):
134 |         sig_list[i] = sig_list[i][:int(length)]
135 | 
136 |     return sig_list
137 | 
138 | 
139 | def get_peaks(params, threshold=-10):
140 |     """Find the peaks based on the given prosodic parameters.
141 | 
142 |     Parameters
143 |     ----------
144 |     params: ?
145 |         Prosodic parameters
146 |     threshold: int
147 |         description
148 | 
149 |     Returns
150 |     -------
151 |     peaks: arraylike
152 |         array of peak values and peak indices
153 |     """
154 |     # zc = np.where(np.diff(np.sign(np.diff(params))))[0]  # FIXME SLM: not used
155 |     indices = (np.diff(np.sign(np.diff(params))) < 0).nonzero()[0] + 1
156 | 
157 |     peaks = params[indices]
158 |     return np.array([peaks[peaks > threshold], indices[peaks > threshold]])
159 | 
160 | 
161 | def calc_prominence(params, labels, func=np.max, use_peaks=True, rate=200):
162 |     """Compute prominences
163 | 
164 |     Parameters
165 |     ----------
166 |     params: type
167 |         description
168 |     labels: type
169 |         description
170 |     func: function handle
171 |     use_peaks: boolean
172 |         Use peaks (True) or not (False) to determine the prominence
173 |     rate: int
174 |         The rate (default=200 (Hz) for 5ms)
175 | 
176 |     """
177 |     labelled = []
178 |     # norm = params.astype(float)  # FIXME SLM: not used
179 |     for (start, end, segment, word) in labels:
180 |         if use_peaks:
181 |             peaks = []
182 |             (peaks, indices) = get_peaks(params[start*rate-1:end*rate], 0.0)
183 | 
184 |             if len(peaks) > 0:
185 |                 labelled.append(np.max(peaks))
186 |             else:
187 |                 labelled.append(0.0)
188 |         else:
189 |             # labelled.append([word, func(params[start-10:end])])
190 |             labelled.append(func(params[start*rate:end*rate]))
191 | 
192 |     return labelled
193 | 
194 | 
195 | def get_best_scale(wavelet_matrix, num_units):
196 |     """Find the scale whose number of peaks is closest to the number of units
197 | 
198 |     Parameters
199 |     ----------
200 |     wavelet_matrix: arraylike
201 |         The wavelet matrix data.
202 |     num_units: int
203 |         The target number of units
204 | 
205 |     Returns
206 |     -------
207 |     int
208 |         the index of the best scale
209 |     """
210 |     best_i = 0
211 |     best = 999
212 |     for i in range(0, wavelet_matrix.shape[0]):
213 |         num_peaks = len(get_peaks(wavelet_matrix[i])[0])
214 | 
215 |         dist = abs(num_peaks - num_units)
216 |         if dist < best:
217 |             best = dist
218 |             best_i = i
219 | 
220 |     return best_i
221 | 
222 | 
223 | def get_best_scale2(scales, labels):
224 |     """Find the scale whose width is the closes to the average unit length represented in the labels
225 | 
226 |     Parameters
227 |     ----------
228 |     scales: 1D arraylike
229 |         The scale indices
230 |     labels: list of tuple (float, float, string)
231 |         List of labels which are lists of 3 elements [start, end, description]
232 | 
233 | 
234 |     Returns
235 |     -------
236 |     int
237 |         the index of the best scale
238 | 
239 |     """
240 |     mean_length = 0
241 |     for l in labels:
242 |         mean_length += (l[1] - l[0])
243 | 
244 |     mean_length /= len(labels)
245 |     dist = scales - mean_length
246 | 
247 |     return np.argmin(np.abs(dist))
248 | 
249 | 
250 | def normalize_minmax(params, epsilon=0.1):
251 |     """Normalize parameters into a 0,1 scale
252 | 
253 |     Parameters
254 |     ----------
255 |     params: arraylike
256 |         The parameters to normalize.
257 |     epsilon: float
258 |         The epsilon to deal with numerical stability
259 | 
260 |     Returns
261 |     ------
262 |     arraylike
263 |         the normalized parameters
264 | 
265 |     """
266 |     return (params-min(params)+epsilon)/(max(params)-min(params))
267 | 
268 | 
269 | def normalize_std(params, std=0):
270 |     """Normalize parameters using a z-score paradigm
271 | 
272 |     Parameters
273 |     ----------
274 |     params: arraylike
275 |         The parameters to normalize.
276 |     std: float
277 |         A given standard deviation. If 0, the standard deviation is computed on the params. (Default: 0)
278 | 
279 | 
280 |     Returns
281 |     ------
282 |     arraylike
283 |         the normalized parameters
284 |     """
285 |     if std == 0:
286 |         std = np.nanstd(params)
287 | 
288 |     # empty array or all zeros
289 |     # if std==0:
290 |     if std < 0.00001:  # np.isclose([std,0]):
291 |         return np.zeros(len(params))
292 | 
293 |     mean = np.nanmean(params)
294 | 
295 |     return (params - mean) / float(std)
296 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_tools/pitch_tracker.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | AUTHOR
  6 |     - Antti Suni <antti.suni@helsinki.fi>
  7 |     - Sébastien Le Maguer <lemagues@tcd.ie>
  8 | 
  9 | DESCRIPTION
 10 |     Module which a default pitch tracker
 11 | 
 12 | LICENSE
 13 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
 14 | """
 15 | 
 16 | # Logging
 17 | import logging
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | import numpy as np
 21 | from . import misc, cwt_utils, f0_processing, smooth_and_interp
 22 | 
 23 | import scipy.signal
 24 | from scipy.signal import windows
 25 | 
 26 | def _get_f0(spec, energy, min_hz, max_hz, thresh, sil_thresh):
 27 |     """
 28 |     return frequency bin with maximum energy, if it is over given threshold
 29 |     and overall energy of the frame is over silence threshsold
 30 |     otherwise return 0 (unvoiced)
 31 |     """
 32 | 
 33 |     cand = int(min_hz)+np.argmax(spec[int(min_hz):int(max_hz)])
 34 |     if spec[cand] > thresh and energy > sil_thresh:
 35 |         if cand > 2*min_hz and spec[int(round(cand/2.))] > spec[cand]*0.5:
 36 |             return int(round(cand/2.))
 37 |         else:
 38 |             return cand
 39 |     return 0
 40 | 
 41 | 
 42 | def _track_pitch(pic, min_hz=50, max_hz=450,thresh=0.1,energy_thresh=1.0, DEBUG=False):
 43 |     """
 44 |     extract pitch contour from time-frequency image
 45 |     bin with maximum energy / frame is chosen as a first f0 estimate,
 46 |     following with refinement steps based on the assumption of continuity of the pitch track
 47 |     """
 48 | 
 49 |     pitch = np.zeros(pic.shape[0])
 50 | 
 51 |     # calc energy threshold for voicing
 52 |     log_energy = np.log(np.sum(pic, axis=1))
 53 |     energy_thresh=np.min(smooth_and_interp.smooth(log_energy,20))+energy_thresh
 54 |     pic_smooth = pic*scipy.ndimage.gaussian_filter(pic, [2,5])
 55 | 
 56 |     # find frequency bins with max_energy
 57 |     for i in range(0, pic_smooth.shape[0]):
 58 |         pitch[i] = _get_f0(pic_smooth[i], log_energy[i],min_hz, max_hz, thresh, energy_thresh)
 59 | 
 60 | 
 61 | 
 62 |     # second pass with soft constraints
 63 |     n_iters = 3
 64 | 
 65 | 
 66 |     for iter in range(0, n_iters):
 67 | 
 68 |         smoothed = f0_processing.process(pitch)
 69 |         smoothed = smooth_and_interp.smooth(smoothed, int(200./(iter+1.)))
 70 | 
 71 | 
 72 |         # gradually thightening gaussian window centered on current estimate to softly constrain next iteration
 73 |         win_len = 800
 74 | 
 75 |         g_window = windows.gaussian(win_len, int(np.mean(smoothed)*(1./(iter+1.)**2)))
 76 |         #g_window = gaussian(win_len, (1./(iter+2)**2)))
 77 | 
 78 |         for i in range(0, pic.shape[0]):
 79 |             window=np.zeros(len(pic_smooth[i]))
 80 |             st = int(np.max((0, int(smoothed[i]-win_len))))
 81 |             end = int(np.min((int(smoothed[i]+win_len*0.5), win_len-st)))
 82 |             window[st:end]=g_window[win_len-end:]
 83 |             pitch[i] = _get_f0(pic_smooth[i]*window, log_energy[i],min_hz, max_hz, thresh, energy_thresh)
 84 | 
 85 |     return pitch
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | def _assign_to_bins(pic, freqs, mags):
 92 |     for i in range(1, freqs.shape[0]-1):
 93 |         for j in range(0, freqs.shape[1]):
 94 |             try:
 95 |                 pic[j, int(freqs[i,j])]+=(mags[i,j])
 96 |             except:
 97 |                 pass
 98 | 
 99 | 
100 | def inst_freq_pitch_from_wav(utt_wav, min_hz=50, max_hz=400, acorr_weight=10., voicing_thresh=50., DEBUG=False, target_rate=200):
101 |     # adjust thhresholds
102 |     # the thresholds are empirically set, depends on number of bins, normalization, smoothing etc..
103 | 
104 | 
105 |     # read wav file, downsample to 4000Hz and normalize
106 | 
107 |     (fs, wav_form) = misc.read_wav(utt_wav)
108 | 
109 |     return inst_freq_pitch(wav_form, fs, min_hz, max_hz, acorr_weight, voicing_thresh, DEBUG, target_rate)
110 | 
111 | def inst_freq_pitch(wav_form, fs, min_hz=50, max_hz=400, acorr_weight=10., voicing_thresh=50., DEBUG=False, target_rate=200):
112 |     """
113 |     extract f0 track from speech wav file using instanenous frequency calculated from continuous wavelet transform
114 |     """
115 | 
116 |     voicing_thresh = (voicing_thresh-50.0) / 100.0
117 |     acorr_weight /= 100.
118 |     sample_rate = 4000
119 |     tmp_wav_form = misc.resample(wav_form, fs, sample_rate)
120 |     #params = scipy.signal.resample_poly(params, 1., int(round(fs/sample_rate)))
121 |     tmp_wav_form = misc.normalize_std(tmp_wav_form)
122 | 
123 |     # init instantenous frequency pic, with rather low time and frequency resolution for speed
124 |     # having 1 hz / bin simplifies the implememtation a bit, but treats males and females differently (other vals do not work)
125 |     steps_in_hertz =1.0
126 | 
127 |     DEC = int(round(sample_rate/target_rate))
128 | 
129 |     pic = np.zeros(shape=(int(len(tmp_wav_form)/float(DEC)), int(sample_rate/4.0)))
130 | 
131 | 
132 |     # use continuous wavelet transform to get instantenous frequencies
133 |     # integrate analyses with morlet mother wavelets with periods = 3,5,7 for good time and frequency resolution
134 | 
135 |     # setup wavelet
136 |     #dt = 0.2 #4./sample_rate
137 |     s0 = 2./sample_rate
138 | 
139 |     dj = 0.05 # 20 scales per octave
140 |     J= 120  # six octaves
141 |     dt = 1./sample_rate
142 |     #periods = [3,5,7] #maybe this is too slow to be default
143 |     periods = [5]
144 |     for p in periods:
145 | 
146 |         (wavelet_matrix,scales,cwt_freqs) = cwt_utils.cwt_analysis(tmp_wav_form, mother_name="morlet",first_scale = s0, num_scales=J, scale_distance=dj, apply_coi=False,period=p, frame_rate = sample_rate)
147 |         # hilbert transform
148 |         phase = np.unwrap(np.angle(wavelet_matrix), axis=1)
149 |         freqs =  np.abs((np.gradient(phase, dt)[1]) / (2. * np.pi))
150 | 
151 |         freqs = scipy.signal.decimate(freqs, DEC, zero_phase=True)
152 |         mags = scipy.signal.decimate(abs(wavelet_matrix), DEC, zero_phase=True)
153 | 
154 |         # normalize magnitudes
155 |         mags = (mags-mags.min())/mags.ptp()
156 | 
157 |         # construct time-frequency image
158 |         _assign_to_bins(pic, freqs, mags)
159 | 
160 | 
161 |     # perform frequency domain autocorrelation to enhance f0
162 | 
163 |     pic= scipy.ndimage.filters.gaussian_filter(pic,[1,1])
164 | 
165 |     length = np.min((max_hz*3,pic.shape[1])).astype(int)
166 | 
167 |     for i in range(0, pic.shape[0]): # frame
168 |         acorr1 = np.correlate(pic[i,:length], pic[i,:length], mode='same')
169 |         pic[i, :int(length/2.)] *= acorr1[int(len(acorr1)/2.):]
170 | 
171 | 
172 | 
173 |     # generate pitch track from the image
174 |     logger.debug("tracking pitch..")
175 | 
176 |     pitch = _track_pitch(pic,min_hz, max_hz, voicing_thresh, DEBUG=DEBUG)
177 | 
178 |     logger.debug("tracking pitch done.")
179 |     return (pitch,pic)
180 | 


--------------------------------------------------------------------------------
/wavelet_prosody_toolkit/prosody_tools/smooth_and_interp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | AUTHOR
  6 |     - Antti Suni <antti.suni@helsinki.fi>
  7 |     - Sébastien Le Maguer <lemagues@tcd.ie>
  8 | 
  9 | DESCRIPTION
 10 |     Module which interpolation routines
 11 | 
 12 | LICENSE
 13 |     See https://github.com/asuni/wavelet_prosody_toolkit/blob/master/LICENSE.txt
 14 | """
 15 | 
 16 | # Global/system packages
 17 | import sys
 18 | 
 19 | # Math/signal processing
 20 | import numpy as np
 21 | from scipy.io import wavfile
 22 | from scipy.signal import decimate
 23 | from scipy import interpolate
 24 | import pylab
 25 | 
 26 | # Logging
 27 | import logging
 28 | logger = logging.getLogger(__name__)
 29 | 
 30 | 
 31 | def remove_bias(params, win_len=300):
 32 |     return params-smooth(params, win_len)
 33 | 
 34 | 
 35 | # copied from https://stackoverflow.com/questions/23024950/interp-function-in-python-like-matlab/40346185#40346185
 36 | def interpolate_by_factor(vector, factor):
 37 |     """
 38 |     Interpolate, i.e. upsample, a given 1D vector by a specific interpolation factor.
 39 |     :param vector: 1D data vector
 40 |     :param factor: factor for interpolation (must be integer)
 41 |     :return: interpolated 1D vector by a given factor
 42 |     """
 43 |     # print(vector, factor)
 44 | 
 45 |     x = np.arange(np.size(vector))
 46 |     y = vector
 47 |     f = interpolate.interp1d(x, y)
 48 | 
 49 |     x_extended_by_factor = np.linspace(x[0], x[-1],
 50 |                                        int(round(np.size(x) * factor)))
 51 |     y_interpolated = np.zeros(np.size(x_extended_by_factor))
 52 | 
 53 |     i = 0
 54 |     for x in x_extended_by_factor:
 55 |         y_interpolated[i] = f(x)
 56 |         i += 1
 57 | 
 58 |     return y_interpolated
 59 | 
 60 | 
 61 | def interpolate_zeros(params, method='pchip', min_val=0):
 62 |     """
 63 |     Interpolate 0 values
 64 |     :param params: 1D data vector
 65 |     :param method:
 66 |     :param factor: factor for interpolation (must be integer)
 67 |     :return: interpolated 1D vector by a given factor
 68 |     """
 69 | 
 70 |     voiced = np.array(params, float)
 71 |     for i in range(0, len(voiced)):
 72 |         if voiced[i] == min_val:
 73 |             voiced[i] = np.nan
 74 | 
 75 |     # last_voiced = len(params) - np.nanargmax(params[::-1] > 0)
 76 | 
 77 |     if np.isnan(voiced[-1]):
 78 |         voiced[-1] = np.nanmin(voiced)
 79 |     if np.isnan(voiced[0]):
 80 |         voiced[0] = np.nanmean(voiced)
 81 | 
 82 |     not_nan = np.logical_not(np.isnan(voiced))
 83 | 
 84 |     indices = np.arange(len(voiced))
 85 |     if method == 'spline':
 86 |         interp = interpolate.UnivariateSpline(indices[not_nan],
 87 |                                               voiced[not_nan],
 88 |                                               k=2, s=0)
 89 |         # return voiced parts intact
 90 |         smoothed = interp(indices)
 91 |         for i in range(0, len(smoothed)):
 92 |             if not np.isnan(voiced[i]):
 93 |                 smoothed[i] = params[i]
 94 | 
 95 |         return smoothed
 96 | 
 97 |     elif method == 'pchip':
 98 |         interp = interpolate.pchip(indices[not_nan], voiced[not_nan])
 99 |     else:
100 |         interp = interpolate.interp1d(indices[not_nan], voiced[not_nan],
101 |                                       method)
102 |     return interp(indices)
103 | 
104 | 
105 | def smooth(params, win, type="HAMMING"):
106 | 
107 |     """
108 |     gaussian type smoothing, convolution with hamming window
109 |     """
110 |     win = int(win+0.5)
111 |     if win >= len(params)-1:
112 |         win = len(params)-1
113 | 
114 |     if win % 2 == 0:
115 |         win += 1
116 | 
117 |     s = np.r_[params[win-1:0:-1], params, params[-1:-win:-1]]
118 | 
119 |     if type == "HAMMING":
120 |         w = np.hamming(win)
121 |         # third = int(win/3)
122 |         # w[:third] = 0
123 |     else:
124 |         w = np.ones(win)
125 | 
126 |     y = np.convolve(w/w.sum(), s, mode='valid')
127 |     return y[int(win/2):-int(win/2)]
128 | 
129 | 
130 | def peak_smooth(params, max_iter, win,
131 |                 min_win=2, voicing=[], TRACE=False):
132 |     """
133 |     Iterative smoothing while preserving peaks, 'true envelope' -style
134 | 
135 |     """
136 | 
137 |     smoothed = np.array(params)
138 |     win_reduce = np.exp(np.linspace(np.log(win), np.log(min_win), max_iter))
139 |     # std = np.std(params)
140 |     if TRACE:
141 |         pylab.ion()
142 |         pylab.plot(params, 'black')
143 | 
144 |     for i in range(0, max_iter):
145 | 
146 |         smoothed = np.maximum(params, smoothed)
147 |         # if TRACE:
148 |         #     if (i > 0) and (i % 2 == 0):
149 |         #         pass
150 |         #         pylab.plot(smoothed, 'gray', linewidth=1)
151 |         #         raw_input()
152 | 
153 |         if len(voicing) > 0:
154 |             smoothed = smooth(smoothed, int(win+0.5))
155 |             smoothed[voicing > 0] = params[voicing > 0]
156 |         else:
157 |             smoothed = smooth(smoothed, int(win+0.5), type='rectangle')
158 | 
159 |         win = win_reduce[i]
160 | 
161 |     if TRACE:
162 |         pylab.plot(smoothed, 'red', linewidth=2)
163 |         pylab.show()
164 |     return smoothed
165 | 


--------------------------------------------------------------------------------