├── .gitignore ├── CODEOWNERS ├── LICENSE.md ├── README.md ├── setup.py ├── tests ├── __init__.py └── unit │ ├── __init__.py │ ├── resources │ ├── invalid_gestural_score.txt │ ├── valid_gestural_score.txt │ ├── valid_motor_series.txt │ └── valid_phoneme_sequence.txt │ ├── test_calculate_tongueroot_automatically.py │ ├── test_gesture_file_to_audio.py │ ├── test_gesture_file_to_motor_file.py │ ├── test_get_constants.py │ ├── test_get_gesture_duration.py │ ├── test_get_param_info.py │ ├── test_get_shape.py │ ├── test_get_version.py │ ├── test_motor_file_to_audio_file.py │ ├── test_motor_to_audio.py │ ├── test_output │ ├── gesture_file_to_audio.wav │ ├── invalid_phoneme_file_to_gesture_file.txt │ ├── valid_gesture_file_to_motor_file.txt │ ├── valid_motor_file_to_audio_file.wav │ ├── valid_phoneme_file_to_gesture_file.txt │ └── valid_shape.svg │ ├── test_phoneme_file_to_gesture_file.py │ ├── test_synth_block.py │ ├── test_tract_state_to_svg.py │ ├── test_tract_state_to_transfer_function.py │ └── test_tract_state_to_tube_state.py └── vocaltractlab ├── __init__.py ├── audioprocessing.py ├── core.py ├── frequency_domain.py ├── logo └── VocalTractLabPythonLogo.svg ├── speaker ├── JD3.speaker ├── female_12_years_0_months.speaker ├── female_18_years_0_months.speaker ├── female_3_years_0_months.speaker ├── female_6_years_0_months.speaker ├── male_12_years_0_months.speaker ├── male_18_years_0_months.speaker ├── male_3_years_0_months.speaker └── male_6_years_0_months.speaker ├── tube_state.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | MANIFEST.in 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | /.vs/PyVTL/v16 132 | /.vs/CMake Overview 133 | /.vs 134 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # These owners will be the default owners for everything in 2 | # the repo and will be requested for review when someone opens a pull request. 3 | * @paul-krug 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VocalTractLab 2 | 3 | ### Warning 4 | This software is currently in alpha stage. You may encounter bugs and future updates may not be backwards-compatible. 5 | 6 | # Installation 7 | 8 | pip install vocaltractlab 9 | 10 | # Description 11 | A python module that implements the articulatory synthesizer VocalTractLab. -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from setuptools import setup 5 | 6 | 7 | 8 | install_requires = [ 9 | 'numpy', 10 | 'target-approximation==0.0.4', 11 | 'tools-mp==0.2.0', 12 | 'torch', 13 | 'torchaudio', 14 | 'vocaltractlab-cython==0.0.14.dev1', 15 | ] 16 | 17 | 18 | 19 | #CLASSIFIERS = """\ 20 | #Development Status :: 3 - Alpha 21 | #Intended Audience :: Science/Research 22 | #Intended Audience :: Developers 23 | #License :: OSI Approved :: GNU General Public License v3 (GPLv3) 24 | #Programming Language :: C++ 25 | #Programming Language :: Python 26 | #Programming Language :: Python :: 3 27 | #Programming Language :: Python :: 3.8 28 | #Programming Language :: Python :: 3.9 29 | #Programming Language :: Python :: Implementation :: CPython 30 | #Topic :: Software Development 31 | #Topic :: Scientific/Engineering 32 | #Typing :: Typed 33 | #Operating System :: Microsoft :: Windows 34 | #Operating System :: POSIX 35 | #Operating System :: Unix 36 | #""" 37 | 38 | setup_args = dict( 39 | name='VocalTractLab', 40 | version='0.5.1', 41 | description='High-performance articulatory speech synthesis in Python', 42 | long_description_content_type='text/markdown', 43 | long_description=open('README.md').read(), 44 | url='https://github.com/paul-krug/VocalTractLab-Python', 45 | author='Paul Krug', 46 | license='GPL-3.0', 47 | #classifiers = [_f for _f in CLASSIFIERS.split('\n') if _f], 48 | keywords=[ 49 | 'text-to-speech', 50 | 'speech synthesis', 51 | 'articulatory synthesis', 52 | 'vocal tract', 53 | 'speech production', 54 | 'vocoder', 55 | ], 56 | packages=['vocaltractlab'], 57 | install_requires=install_requires 58 | ) 59 | 60 | setup(**setup_args) -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paul-krug/VocalTractLab-Python/bb37e1894a790f31d4ef7c19803c177cc095e290/tests/__init__.py -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paul-krug/VocalTractLab-Python/bb37e1894a790f31d4ef7c19803c177cc095e290/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/resources/invalid_gestural_score.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | -------------------------------------------------------------------------------- /tests/unit/resources/valid_gestural_score.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | -------------------------------------------------------------------------------- /tests/unit/resources/valid_phoneme_sequence.txt: -------------------------------------------------------------------------------- 1 | name = ; duration_s = 0.157573; 2 | name = ?; duration_s = 0.032311; 3 | name = E; duration_s = 0.056973; 4 | name = s; duration_s = 0.086442; 5 | name = k; duration_s = 0.102374; 6 | name = a; duration_s = 0.073007; 7 | name = n; duration_s = 0.043829; 8 | name = h; duration_s = 0.077127; 9 | name = I; duration_s = 0.045877; 10 | name = l; duration_s = 0.066408; 11 | name = f; duration_s = 0.107215; 12 | name = R; duration_s = 0.066136; 13 | name = aI; duration_s = 0.156738; 14 | name = C; duration_s = 0.063610; 15 | name = z; duration_s = 0.082966; 16 | name = aI; duration_s = 0.230612; 17 | name = n; duration_s = 0.124231; 18 | name = v; duration_s = 0.053200; 19 | name = E; duration_s = 0.049495; 20 | name = n; duration_s = 0.034342; 21 | name = m; duration_s = 0.037373; 22 | name = a; duration_s = 0.060102; 23 | name = n; duration_s = 0.079801; 24 | name = v; duration_s = 0.063513; 25 | name = aI; duration_s = 0.279681; 26 | name = s; duration_s = 0.179362; 27 | name = v; duration_s = 0.060292; 28 | name = i:; duration_s = 0.080591; 29 | name = ?; duration_s = 0.029192; 30 | name = aI; duration_s = 0.094086; 31 | name = n; duration_s = 0.065344; 32 | name = ?; duration_s = 0.032229; 33 | name = U; duration_s = 0.081168; 34 | name = n; duration_s = 0.056591; 35 | name = t; duration_s = 0.066186; 36 | name = 6; duration_s = 0.044691; 37 | name = S; duration_s = 0.086280; 38 | name = t; duration_s = 0.064556; 39 | name = a; duration_s = 0.061135; 40 | name = n; duration_s = 0.064506; 41 | name = t; duration_s = 0.059556; 42 | name = g; duration_s = 0.059901; 43 | name = E; duration_s = 0.033798; 44 | name = b; duration_s = 0.088358; 45 | name = aU; duration_s = 0.202452; 46 | name = t; duration_s = 0.061473; 47 | name = v; duration_s = 0.081959; 48 | name = I6; duration_s = 0.183965; 49 | name = t; duration_s = 0.127938; 50 | -------------------------------------------------------------------------------- /tests/unit/test_calculate_tongueroot_automatically.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel(logging.INFO) 4 | 5 | import unittest 6 | from vocaltractlab_cython import calculate_tongueroot_automatically, VtlApiError 7 | 8 | class TestCalculateTonguerootAutomatically(unittest.TestCase): 9 | 10 | def test_enable_automatic_calculation(self): 11 | # Test enabling automatic calculation 12 | try: 13 | calculate_tongueroot_automatically(True) 14 | self.assertTrue(True) # No exception should be raised 15 | except VtlApiError as e: 16 | self.fail(f"Failed to enable automatic calculation: {e}") 17 | 18 | def test_disable_automatic_calculation(self): 19 | # Test disabling automatic calculation 20 | try: 21 | calculate_tongueroot_automatically(False) 22 | self.assertTrue(True) # No exception should be raised 23 | except VtlApiError as e: 24 | self.fail(f"Failed to disable automatic calculation: {e}") 25 | 26 | def test_invalid_input_type(self): 27 | # Test for invalid input type (non-boolean) 28 | with self.assertRaises(TypeError): 29 | calculate_tongueroot_automatically("InvalidInput") -------------------------------------------------------------------------------- /tests/unit/test_gesture_file_to_audio.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel(logging.INFO) 4 | 5 | import os 6 | import unittest 7 | from vocaltractlab_cython import gesture_file_to_audio, VtlApiError 8 | import numpy as np 9 | 10 | class TestGestureFileToAudio(unittest.TestCase): 11 | 12 | def __init__(self, methodName: str = "runTest") -> None: 13 | super().__init__(methodName) 14 | self.valid_gesture_file = os.path.join( 15 | os.path.dirname(__file__), 16 | 'resources', 17 | 'valid_gestural_score.txt', 18 | ) 19 | self.invalid_gesture_file = os.path.join( 20 | os.path.dirname(__file__), 21 | 'resources', 22 | 'invalid_gestural_score.txt', 23 | ) 24 | self.audio_file = os.path.join( 25 | os.path.dirname(__file__), 26 | 'test_output', 27 | 'gesture_file_to_audio.wav', 28 | ) 29 | os.makedirs( 30 | os.path.join( 31 | os.path.dirname(__file__), 32 | 'test_output', 33 | ), 34 | exist_ok=True, 35 | ) 36 | 37 | def test_generate_audio_from_gesture_file(self): 38 | print( 'test_generate_audio_from_gesture_file' ) 39 | # Test generating audio from a valid gesture file 40 | audio = gesture_file_to_audio(self.valid_gesture_file) 41 | self.assertIsInstance(audio, np.ndarray) # Check if audio is a NumPy array 42 | 43 | def test_save_generated_audio(self): 44 | print( 'test_save_generated_audio' ) 45 | # Test generating audio and saving it to a WAV file 46 | audio = gesture_file_to_audio( 47 | self.valid_gesture_file, 48 | self.audio_file, 49 | ) 50 | self.assertIsInstance(audio, np.ndarray) # Check if audio is a NumPy array 51 | 52 | def test_generate_audio_with_verbose_output(self): 53 | print( 'test_generate_audio_with_verbose_output' ) 54 | # Test generating audio with verbose API output 55 | audio = gesture_file_to_audio( 56 | self.valid_gesture_file, 57 | verbose_api=True, 58 | ) 59 | self.assertIsInstance(audio, np.ndarray) # Check if audio is a NumPy array 60 | 61 | def test_invalid_gesture_file(self): 62 | print( 'test_invalid_gesture_file' ) 63 | # Test generating audio from an invalid gesture file (should raise an exception) 64 | with self.assertRaises(VtlApiError): 65 | gesture_file_to_audio( self.invalid_gesture_file ) -------------------------------------------------------------------------------- /tests/unit/test_gesture_file_to_motor_file.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel(logging.INFO) 4 | 5 | import os 6 | import unittest 7 | from vocaltractlab_cython import gesture_file_to_motor_file, VtlApiError 8 | 9 | class TestGestureFileToMotorFile(unittest.TestCase): 10 | 11 | def __init__(self, methodName: str = "runTest") -> None: 12 | super().__init__(methodName) 13 | self.valid_gesture_file = os.path.join( 14 | os.path.dirname(__file__), 15 | 'resources', 16 | 'valid_gestural_score.txt', 17 | ) 18 | self.invalid_gesture_file = os.path.join( 19 | os.path.dirname(__file__), 20 | 'resources', 21 | 'invalid_gestural_score.txt', 22 | ) 23 | self.valid_motor_file = os.path.join( 24 | os.path.dirname(__file__), 25 | 'test_output', 26 | 'valid_gesture_file_to_motor_file.txt', 27 | ) 28 | self.invalid_motor_file = os.path.join( 29 | os.path.dirname(__file__), 30 | 'test_output', 31 | 'invalid_gesture_file_to_motor_file.txt', 32 | ) 33 | os.makedirs( 34 | os.path.join( 35 | os.path.dirname(__file__), 36 | 'test_output', 37 | ), 38 | exist_ok=True, 39 | ) 40 | 41 | def test_generate_motor_file_from_gesture_file(self): 42 | # Test generating a motor file from a valid gesture file 43 | gesture_file_to_motor_file( 44 | self.valid_gesture_file, 45 | self.valid_motor_file, 46 | ) 47 | self.assertTrue(os.path.exists(self.valid_motor_file)) # Check if the motor file was generated 48 | 49 | def test_generate_motor_file_invalid_gesture_file(self): 50 | # Test generating a motor file from an invalid gesture file (should raise an exception) 51 | with self.assertRaises(VtlApiError): 52 | gesture_file_to_motor_file( 53 | self.invalid_gesture_file, 54 | self.invalid_motor_file, 55 | ) -------------------------------------------------------------------------------- /tests/unit/test_get_constants.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel(logging.INFO) 4 | 5 | import unittest 6 | from vocaltractlab_cython import get_constants, VtlApiError 7 | 8 | class TestGetConstants(unittest.TestCase): 9 | 10 | def test_retrieve_constants(self): 11 | # Test retrieving constants and parameters 12 | try: 13 | constants = get_constants() 14 | self.assertIsInstance(constants, dict) # Check if constants is a dictionary 15 | # Check if the expected keys are present in the dictionary 16 | self.assertTrue("sr_audio" in constants) 17 | self.assertTrue("sr_internal" in constants) 18 | self.assertTrue("n_tube_sections" in constants) 19 | self.assertTrue("n_tract_params" in constants) 20 | self.assertTrue("n_glottis_params" in constants) 21 | self.assertTrue("n_samples_per_state" in constants) 22 | except VtlApiError as e: 23 | self.fail(f"Failed to retrieve constants: {e}") 24 | except ValueError as ve: 25 | self.fail(f"Invalid values retrieved: {ve}") -------------------------------------------------------------------------------- /tests/unit/test_get_gesture_duration.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel(logging.INFO) 4 | 5 | import os 6 | import unittest 7 | from vocaltractlab_cython import get_gesture_duration, VtlApiError, get_constants 8 | 9 | class TestGetGestureDuration(unittest.TestCase): 10 | 11 | def test_retrieve_duration_information(self): 12 | # Test retrieving duration information 13 | try: 14 | valid_gesture_file = os.path.join( 15 | os.path.dirname(__file__), 16 | 'resources', 17 | 'valid_gestural_score.txt', 18 | ) 19 | duration_info = get_gesture_duration(valid_gesture_file) 20 | self.assertIsInstance(duration_info, dict) # Check if duration_info is a dictionary 21 | # Check if the expected keys are present in the dictionary 22 | self.assertTrue("n_audio_samples" in duration_info) 23 | self.assertTrue("n_gesture_samples" in duration_info) 24 | self.assertTrue("duration" in duration_info) 25 | # Check if the values are non-negative 26 | self.assertTrue(duration_info["n_audio_samples"] >= 0) 27 | self.assertTrue(duration_info["n_gesture_samples"] >= 0) 28 | self.assertTrue(duration_info["duration"] >= 0.0) 29 | except VtlApiError as e: 30 | self.fail(f"Failed to retrieve duration information: {e}") -------------------------------------------------------------------------------- /tests/unit/test_get_param_info.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel(logging.INFO) 4 | 5 | import unittest 6 | from vocaltractlab_cython import get_param_info, VtlApiError, get_constants 7 | 8 | class TestGetParamInfo(unittest.TestCase): 9 | 10 | def test_retrieve_tract_params(self): 11 | # Test retrieving vocal tract parameters 12 | try: 13 | vocal_tract_params = get_param_info('tract') 14 | self.assertIsInstance(vocal_tract_params, list) # Check if vocal_tract_params is a list 15 | # Check if the list contains dictionaries with the expected keys 16 | for param in vocal_tract_params: 17 | self.assertTrue("name" in param) 18 | self.assertTrue("description" in param) 19 | self.assertTrue("unit" in param) 20 | self.assertTrue("min" in param) 21 | self.assertTrue("max" in param) 22 | self.assertTrue("standard" in param) 23 | except VtlApiError as e: 24 | self.fail(f"Failed to retrieve vocal tract parameters: {e}") 25 | 26 | def test_retrieve_glottis_params(self): 27 | # Test retrieving glottis parameters 28 | try: 29 | glottis_params = get_param_info('glottis') 30 | self.assertIsInstance(glottis_params, list) # Check if glottis_params is a list 31 | # Check if the list contains dictionaries with the expected keys 32 | for param in glottis_params: 33 | self.assertTrue("name" in param) 34 | self.assertTrue("description" in param) 35 | self.assertTrue("unit" in param) 36 | self.assertTrue("min" in param) 37 | self.assertTrue("max" in param) 38 | self.assertTrue("standard" in param) 39 | except VtlApiError as e: 40 | self.fail(f"Failed to retrieve glottis parameters: {e}") 41 | 42 | def test_invalid_params_argument(self): 43 | # Test providing an invalid 'params' argument (should raise a ValueError) 44 | with self.assertRaises(ValueError): 45 | get_param_info('invalid_params') -------------------------------------------------------------------------------- /tests/unit/test_get_shape.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel(logging.INFO) 4 | 5 | import unittest 6 | import numpy as np 7 | from vocaltractlab_cython import get_shape, VtlApiError, get_constants 8 | 9 | class TestGetShape(unittest.TestCase): 10 | 11 | def test_retrieve_vocal_tract_shape(self): 12 | # Test retrieving vocal tract shape parameters 13 | try: 14 | valid_shape_name = "a" 15 | vocal_tract_shape = get_shape(valid_shape_name, 'tract') 16 | self.assertIsInstance(vocal_tract_shape, np.ndarray) # Check if vocal_tract_shape is a NumPy array 17 | # Check if the shape array has the expected shape (size) 18 | vtl_constants = get_constants() 19 | self.assertEqual(vocal_tract_shape.size, vtl_constants["n_tract_params"]) 20 | except VtlApiError as e: 21 | self.fail(f"Failed to retrieve vocal tract shape parameters: {e}") 22 | 23 | def test_retrieve_glottis_shape(self): 24 | # Test retrieving glottis shape parameters 25 | try: 26 | valid_shape_name = "modal" 27 | glottis_shape = get_shape(valid_shape_name, 'glottis') 28 | self.assertIsInstance(glottis_shape, np.ndarray) # Check if glottis_shape is a NumPy array 29 | # Check if the shape array has the expected shape (size) 30 | vtl_constants = get_constants() 31 | self.assertEqual(glottis_shape.size, vtl_constants["n_glottis_params"]) 32 | except VtlApiError as e: 33 | self.fail(f"Failed to retrieve glottis shape parameters: {e}") 34 | 35 | def test_invalid_params_argument(self): 36 | # Test providing an invalid 'params' argument (should raise a ValueError) 37 | with self.assertRaises(ValueError): 38 | shape_name = "valid_shape" 39 | get_shape(shape_name, 'invalid_params') -------------------------------------------------------------------------------- /tests/unit/test_get_version.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel(logging.INFO) 4 | 5 | import unittest 6 | from vocaltractlab_cython import get_version 7 | 8 | class TestGetVersion(unittest.TestCase): 9 | 10 | def test_retrieve_version_information(self): 11 | # Test retrieving version information 12 | version = get_version() 13 | self.assertIsInstance(version, str) # Check if version is a string 14 | self.assertGreater(len(version), 0) # Check if the string is not empty -------------------------------------------------------------------------------- /tests/unit/test_motor_file_to_audio_file.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | from vocaltractlab_cython import motor_file_to_audio_file, VtlApiError 4 | 5 | class TestMotorFileToAudioFile(unittest.TestCase): 6 | 7 | def __init__(self, methodName: str = "runTest") -> None: 8 | super().__init__(methodName) 9 | self.valid_motor_file = os.path.join( 10 | os.path.dirname(__file__), 11 | 'resources', 12 | 'valid_motor_series.txt', 13 | ) 14 | self.invalid_motor_file = os.path.join( 15 | os.path.dirname(__file__), 16 | 'this_file_does_not_exist', 17 | 'invalid_motor_series.txt', 18 | ) 19 | self.valid_audio_file = os.path.join( 20 | os.path.dirname(__file__), 21 | 'test_output', 22 | 'valid_motor_file_to_audio_file.wav', 23 | ) 24 | self.invalid_audio_file = os.path.join( 25 | os.path.dirname(__file__), 26 | 'test_output', 27 | 'invalid_motor_file_to_audio_file.wav', 28 | ) 29 | os.makedirs( 30 | os.path.join( 31 | os.path.dirname(__file__), 32 | 'test_output', 33 | ), 34 | exist_ok=True, 35 | ) 36 | 37 | def test_valid_conversion(self): 38 | try: 39 | # Convert the motor file to an audio file 40 | motor_file_to_audio_file( 41 | self.valid_motor_file, 42 | self.valid_audio_file, 43 | ) 44 | # Assert that the audio file was generated 45 | self.assertTrue(os.path.exists(self.valid_audio_file)) 46 | except VtlApiError as e: 47 | self.fail(f"Conversion failed with error: {e}") 48 | 49 | def test_invalid_motor_file(self): 50 | # Attempt to convert a non-existent motor file 51 | with self.assertRaises(FileNotFoundError): 52 | motor_file_to_audio_file( 53 | self.invalid_motor_file, 54 | self.invalid_audio_file, 55 | ) 56 | 57 | if __name__ == '__main__': 58 | unittest.main() 59 | -------------------------------------------------------------------------------- /tests/unit/test_motor_to_audio.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | from vocaltractlab.core import motor_to_audio 4 | 5 | class TestMotorToAudio(unittest.TestCase): 6 | 7 | def __init__(self, methodName: str = "runTest") -> None: 8 | super().__init__(methodName) 9 | self.valid_motor_file = os.path.join( 10 | os.path.dirname(__file__), 11 | 'resources', 12 | 'valid_motor_series.txt', 13 | ) 14 | self.invalid_motor_file = os.path.join( 15 | os.path.dirname(__file__), 16 | 'this_file_does_not_exist', 17 | 'invalid_motor_series.txt', 18 | ) 19 | self.valid_audio_file = os.path.join( 20 | os.path.dirname(__file__), 21 | 'test_output', 22 | 'valid_motor_file_to_audio_file.wav', 23 | ) 24 | self.invalid_audio_file = os.path.join( 25 | os.path.dirname(__file__), 26 | 'test_output', 27 | 'invalid_motor_file_to_audio_file.wav', 28 | ) 29 | os.makedirs( 30 | os.path.join( 31 | os.path.dirname(__file__), 32 | 'test_output', 33 | ), 34 | exist_ok=True, 35 | ) 36 | 37 | def test_valid_conversion(self): 38 | try: 39 | # Convert the motor file to an audio file 40 | motor_to_audio( 41 | self.valid_motor_file, 42 | self.valid_audio_file, 43 | ) 44 | # Assert that the audio file was generated 45 | self.assertTrue(os.path.exists(self.valid_audio_file)) 46 | except VtlApiError as e: 47 | self.fail(f"Conversion failed with error: {e}") 48 | 49 | def test_invalid_motor_file(self): 50 | # Attempt to convert a non-existent motor file 51 | with self.assertRaises(FileNotFoundError): 52 | motor_file_to_audio_file( 53 | self.invalid_motor_file, 54 | self.invalid_audio_file, 55 | ) 56 | 57 | if __name__ == '__main__': 58 | unittest.main() 59 | -------------------------------------------------------------------------------- /tests/unit/test_output/gesture_file_to_audio.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paul-krug/VocalTractLab-Python/bb37e1894a790f31d4ef7c19803c177cc095e290/tests/unit/test_output/gesture_file_to_audio.wav -------------------------------------------------------------------------------- /tests/unit/test_output/invalid_phoneme_file_to_gesture_file.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /tests/unit/test_output/valid_motor_file_to_audio_file.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paul-krug/VocalTractLab-Python/bb37e1894a790f31d4ef7c19803c177cc095e290/tests/unit/test_output/valid_motor_file_to_audio_file.wav -------------------------------------------------------------------------------- /tests/unit/test_output/valid_phoneme_file_to_gesture_file.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /tests/unit/test_output/valid_shape.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /tests/unit/test_phoneme_file_to_gesture_file.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel(logging.INFO) 4 | 5 | import os 6 | import unittest 7 | from vocaltractlab_cython import phoneme_file_to_gesture_file, VtlApiError 8 | 9 | class TestPhonemeFileToGestureFile(unittest.TestCase): 10 | 11 | def __init__(self, methodName: str = "runTest") -> None: 12 | super().__init__(methodName) 13 | self.valid_phoneme_file = os.path.join( 14 | os.path.dirname(__file__), 15 | 'resources', 16 | 'valid_phoneme_sequence.txt', 17 | ) 18 | self.invalid_phoneme_file = os.path.join( 19 | os.path.dirname(__file__), 20 | 'this_file_does_not_exist', 21 | 'invalid_phoneme_sequence.txt', 22 | ) 23 | self.valid_gesture_file = os.path.join( 24 | os.path.dirname(__file__), 25 | 'test_output', 26 | 'valid_phoneme_file_to_gesture_file.txt', 27 | ) 28 | self.invalid_gesture_file = os.path.join( 29 | os.path.dirname(__file__), 30 | 'test_output', 31 | 'invalid_phoneme_file_to_gesture_file.txt', 32 | ) 33 | os.makedirs( 34 | os.path.join( 35 | os.path.dirname(__file__), 36 | 'test_output', 37 | ), 38 | exist_ok=True, 39 | ) 40 | 41 | def test_generate_gestural_score(self): 42 | # Test generating a gestural score file from a phoneme sequence file 43 | try: 44 | phoneme_file_to_gesture_file( 45 | self.valid_phoneme_file, 46 | self.valid_gesture_file, 47 | verbose_api=True, 48 | ) 49 | self.assertTrue(os.path.exists(self.valid_gesture_file)) # Check if the gesture file was created 50 | except VtlApiError as e: 51 | self.fail(f"Failed to generate gestural score file: {e}") 52 | 53 | def test_invalid_phoneme_file(self): 54 | # Test providing a nonexistent phoneme sequence file (should raise a FileNotFoundError) 55 | with self.assertRaises(FileNotFoundError): 56 | phoneme_file_to_gesture_file( 57 | self.invalid_phoneme_file, 58 | self.invalid_gesture_file, 59 | ) 60 | 61 | if __name__ == '__main__': 62 | unittest.main() 63 | -------------------------------------------------------------------------------- /tests/unit/test_synth_block.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from vocaltractlab_cython import synth_block, get_shape 4 | 5 | class TestSynthBlock(unittest.TestCase): 6 | 7 | def test_valid_input(self): 8 | # Create valid input arrays 9 | tract_params = get_shape( 'a', params='tract' ).reshape(1, -1) 10 | glottis_params = get_shape( 'modal', params='glottis' ).reshape(1, -1) 11 | 12 | # Synthesize audio 13 | audio = synth_block(tract_params, glottis_params, state_samples=48000) 14 | 15 | # Check if the audio array is not empty 16 | self.assertTrue(len(audio) == 48000) 17 | 18 | def test_invalid_tract_params_shape(self): 19 | # Create an invalid input array with incorrect tract parameters shape 20 | tract_params = np.array([[0.1, 0.2, 0.3, 0.4], [0.4, 0.5, 0.6, 0.7]]) 21 | 22 | # Ensure a ValueError is raised 23 | with self.assertRaises(ValueError): 24 | synth_block(tract_params, np.array([[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]])) 25 | 26 | def test_invalid_glottis_params_shape(self): 27 | # Create an invalid input array with incorrect glottis parameters shape 28 | glottis_params = np.array([[0.7, 0.8], [1.0, 1.1]]) 29 | 30 | # Ensure a ValueError is raised 31 | with self.assertRaises(ValueError): 32 | synth_block(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]), glottis_params) 33 | 34 | def test_mismatched_number_of_time_steps(self): 35 | # Create input arrays with different numbers of time steps 36 | tract_params = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) 37 | glottis_params = np.array([[0.7, 0.8, 0.9]]) 38 | 39 | # Ensure a ValueError is raised 40 | with self.assertRaises(ValueError): 41 | synth_block(tract_params, glottis_params) 42 | 43 | if __name__ == '__main__': 44 | unittest.main() 45 | -------------------------------------------------------------------------------- /tests/unit/test_tract_state_to_svg.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import os 4 | from vocaltractlab_cython import tract_state_to_svg, get_shape 5 | 6 | class TestTractStateToSvg(unittest.TestCase): 7 | 8 | def __init__(self, methodName: str = "runTest") -> None: 9 | super().__init__(methodName) 10 | self.valid_svg_file = os.path.join( 11 | os.path.dirname(__file__), 12 | 'test_output', 13 | 'valid_shape.svg', 14 | ) 15 | self.invalid_svg_file = os.path.join( 16 | os.path.dirname(__file__), 17 | 'test_output', 18 | 'invalid_shape.svg', 19 | ) 20 | os.makedirs( 21 | os.path.join( 22 | os.path.dirname(__file__), 23 | 'test_output', 24 | ), 25 | exist_ok=True, 26 | ) 27 | 28 | def test_valid_tract_state(self): 29 | # Create a valid vocal tract state (matching the number of tract parameters) 30 | valid_vocal_tract_state = get_shape( 'a', params = 'tract' ) 31 | 32 | # Ensure no exception is raised when exporting the SVG 33 | with self.subTest(test_case="Valid Vocal Tract State"): 34 | tract_state_to_svg( 35 | valid_vocal_tract_state, 36 | self.valid_svg_file, 37 | ) 38 | 39 | # Check if the SVG file was created 40 | self.assertTrue(os.path.exists(self.valid_svg_file), "SVG file was not created") 41 | 42 | def test_invalid_tract_state(self): 43 | # Create an invalid vocal tract state (wrong length) 44 | vocal_tract_state = np.array([0.1, 0.2, 0.3]) # Incorrect length 45 | 46 | # Ensure a ValueError is raised 47 | with self.subTest(test_case="Invalid Vocal Tract State - Wrong Length"): 48 | with self.assertRaises(ValueError): 49 | tract_state_to_svg(vocal_tract_state, self.invalid_svg_file) 50 | 51 | def test_invalid_tract_state_dim(self): 52 | # Create an invalid vocal tract state (not a 1D array) 53 | vocal_tract_state = np.array([[0.1, 0.2, 0.3, 0.4, 0.5]]) # 2D array 54 | 55 | # Ensure a ValueError is raised 56 | with self.subTest(test_case="Invalid Vocal Tract State - Not 1D Array"): 57 | with self.assertRaises(ValueError): 58 | tract_state_to_svg(vocal_tract_state, self.invalid_svg_file) 59 | 60 | if __name__ == '__main__': 61 | unittest.main() 62 | -------------------------------------------------------------------------------- /tests/unit/test_tract_state_to_transfer_function.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from vocaltractlab_cython import tract_state_to_transfer_function, get_shape 4 | 5 | class TestTractStateToTransferFunction(unittest.TestCase): 6 | def test_valid_tract_state(self): 7 | # Create a valid vocal tract state (matching the number of tract parameters) 8 | vocal_tract_state = get_shape( 'a', params = 'tract' ) 9 | 10 | # Compute the transfer function 11 | transfer_function = tract_state_to_transfer_function(vocal_tract_state) 12 | 13 | # Check if the computed transfer function contains magnitude and phase spectra 14 | with self.subTest(test_case="Valid Vocal Tract State"): 15 | self.assertTrue("magnitude_spectrum" in transfer_function) 16 | self.assertTrue("phase_spectrum" in transfer_function) 17 | 18 | # Check if the number of spectrum samples matches the default value 19 | self.assertEqual(transfer_function["n_spectrum_samples"], 8192) 20 | 21 | # Check if magnitude spectrum has the expected shape 22 | self.assertEqual(transfer_function["magnitude_spectrum"].shape[0], 8192) 23 | self.assertGreater(np.max(transfer_function["magnitude_spectrum"]), 0.0) 24 | 25 | def test_invalid_tract_state_length(self): 26 | # Create an invalid vocal tract state (wrong length) 27 | vocal_tract_state = np.array([0.1, 0.2]) # Incorrect length 28 | 29 | # Ensure a ValueError is raised 30 | with self.subTest(test_case="Invalid Vocal Tract State - Wrong Length"): 31 | with self.assertRaises(ValueError): 32 | tract_state_to_transfer_function(vocal_tract_state) 33 | 34 | def test_invalid_tract_state_dim(self): 35 | # Create an invalid vocal tract state (not a 1D array) 36 | vocal_tract_state = np.array([[0.1, 0.2, 0.3]]) # 2D array 37 | 38 | # Ensure a ValueError is raised 39 | with self.subTest(test_case="Invalid Vocal Tract State - Not 1D Array"): 40 | with self.assertRaises(ValueError): 41 | tract_state_to_transfer_function(vocal_tract_state) 42 | 43 | if __name__ == '__main__': 44 | unittest.main() 45 | -------------------------------------------------------------------------------- /tests/unit/test_tract_state_to_tube_state.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from vocaltractlab_cython import tract_state_to_tube_state, get_shape 4 | 5 | class TestTractStateToTubeState(unittest.TestCase): 6 | def test_valid_tract_state(self): 7 | # Create a valid vocal tract state (matching the number of tract parameters) 8 | vocal_tract_state = get_shape( 'a', params = 'tract' ) 9 | 10 | # Compute the tube state 11 | tube_state = tract_state_to_tube_state(vocal_tract_state) 12 | 13 | # Check if the computed tube state contains the expected information 14 | with self.subTest(test_case="Valid Vocal Tract State"): 15 | self.assertTrue("tube_length" in tube_state) 16 | self.assertTrue("tube_area" in tube_state) 17 | self.assertTrue("tube_articulator" in tube_state) 18 | self.assertTrue("incisor_position" in tube_state) 19 | self.assertTrue("tongue_tip_side_elevation" in tube_state) 20 | self.assertTrue("velum_opening" in tube_state) 21 | 22 | # Check if the tube length and area arrays have the expected shape 23 | self.assertEqual(tube_state["tube_length"].shape[0], 40) # Shpuld be 40 24 | self.assertEqual(tube_state["tube_area"].shape[0], 40) # Should be 40 25 | 26 | # Check if other values are not None 27 | self.assertIsNotNone(tube_state["tube_articulator"]) 28 | self.assertIsNotNone(tube_state["incisor_position"]) 29 | self.assertIsNotNone(tube_state["tongue_tip_side_elevation"]) 30 | self.assertIsNotNone(tube_state["velum_opening"]) 31 | 32 | def test_invalid_tract_state_length(self): 33 | # Create an invalid vocal tract state (wrong length) 34 | vocal_tract_state = np.array([0.1, 0.2]) # Incorrect length 35 | 36 | # Ensure a ValueError is raised 37 | with self.subTest(test_case="Invalid Vocal Tract State - Wrong Length"): 38 | with self.assertRaises(ValueError): 39 | tract_state_to_tube_state(vocal_tract_state) 40 | 41 | def test_invalid_tract_state_dim(self): 42 | # Create an invalid vocal tract state (not a 1D array) 43 | vocal_tract_state = np.array([[0.1, 0.2, 0.3]]) # 2D array 44 | 45 | # Ensure a ValueError is raised 46 | with self.subTest(test_case="Invalid Vocal Tract State - Not 1D Array"): 47 | with self.assertRaises(ValueError): 48 | tract_state_to_tube_state(vocal_tract_state) 49 | 50 | if __name__ == '__main__': 51 | unittest.main() 52 | -------------------------------------------------------------------------------- /vocaltractlab/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from vocaltractlab_cython import * 5 | from .core import * 6 | from .audioprocessing import * 7 | from .utils import * -------------------------------------------------------------------------------- /vocaltractlab/audioprocessing.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | import os 5 | import torch 6 | import torchaudio 7 | import torchaudio.functional as F 8 | #import torchaudio.transforms as T 9 | import numpy as np 10 | #import librosa 11 | #from scipy import interpolate as ip 12 | #import parselmouth 13 | #import matplotlib.pyplot as plt 14 | from vocaltractlab_cython import get_constants 15 | 16 | from typing import Tuple 17 | from typing import Union 18 | from typing import Optional 19 | from typing import Dict 20 | from numpy.typing import ArrayLike 21 | 22 | from .utils import strictly_increasing 23 | 24 | 25 | 26 | torch.set_num_threads(1) 27 | torch.multiprocessing.set_sharing_strategy('file_system') 28 | 29 | MAX_WAV_VALUE = 32768.0 30 | 31 | def to_float( 32 | x: Union[torch.Tensor, ArrayLike], 33 | ) -> torch.Tensor: 34 | """Converts a tensor of ints into floats in the range [-1, 1]. 35 | Args: 36 | x (Union[torch.Tensor, ArrayLike]): Tensor of ints with arbitrary shape. 37 | Returns: 38 | torch.Tensor: Tensor of floats with same shape as x. 39 | """ 40 | # Convert to torch.Tensor if needed. 41 | if not isinstance(x, torch.Tensor): 42 | x = torch.tensor(x) 43 | # Convert to float if needed. 44 | if not x.dtype == torch.float: 45 | input_dtype = x.dtype 46 | x = x.float() 47 | if input_dtype == torch.int: 48 | x /= MAX_WAV_VALUE 49 | return x 50 | 51 | def to_int( 52 | waveform, 53 | ): 54 | """ 55 | Convert any audio array to Torch int tensor. 56 | :param waveform: Audio to convert. 57 | :return: Audio as int16. 58 | """ 59 | # Convert to torch tensor 60 | if not isinstance(waveform, torch.Tensor): 61 | waveform = torch.tensor(waveform) 62 | # Conver to int 63 | if not waveform.dtype == torch.int16: 64 | waveform = waveform * 32768 65 | waveform = waveform.short() 66 | return waveform 67 | 68 | def normalize_audio_amplitude( 69 | x, 70 | dBFS = -1, 71 | ): #normalisation in dB 72 | norm_factor = 10**( -1 * dBFS * 0.05 ) -1 73 | norm_max = torch.max( torch.abs( x ) )#, axis=0) 74 | x /= ( norm_max + ( norm_max * norm_factor ) ) 75 | return x 76 | 77 | def postprocess( 78 | x: ArrayLike, 79 | sr_out: int, 80 | dBFS: int = -1, 81 | file_path: str = None, 82 | to_numpy: bool = False, 83 | ) -> np.ndarray: 84 | 85 | vtl_constants = get_constants() 86 | 87 | x = torch.tensor( x ).unsqueeze( 0 ) 88 | 89 | if sr_out is None: 90 | sr_out = vtl_constants[ 'sr_audio' ] 91 | elif sr_out != vtl_constants[ 'sr_audio' ]: 92 | x = resample_like_librosa( 93 | x = x, 94 | sr_in = vtl_constants[ 'sr_audio' ], 95 | sr_out = sr_out, 96 | ) 97 | 98 | if dBFS is not None: 99 | x = normalize_audio_amplitude( 100 | x = x, 101 | dBFS = dBFS, 102 | ) 103 | 104 | if file_path is not None: 105 | if not os.path.exists( 106 | os.path.dirname( file_path ) 107 | ): 108 | os.makedirs( 109 | os.path.dirname( file_path ), 110 | exist_ok = True, 111 | ) 112 | torchaudio.save( 113 | file_path, 114 | x, 115 | sr_out, 116 | ) 117 | 118 | if to_numpy: 119 | x = x.numpy() 120 | 121 | return x 122 | 123 | def resample_like_librosa( 124 | x: Union[torch.Tensor, ArrayLike], 125 | sr_in: int, 126 | sr_out: int, 127 | ) -> torch.Tensor: 128 | """ 129 | Resample a time series, similar to librosa 130 | with 'kaiser_best' resampling method. 131 | Args: 132 | x (Union[torch.Tensor, ArrayLike]): Tensor of ints with arbitrary shape. 133 | sr_in (int): Input sampling rate. 134 | sr_out (int): Output sampling rate. 135 | Returns: 136 | torch.Tensor: Tensor of floats with same shape as x. 137 | """ 138 | # Convert to torch.Tensor if needed. 139 | x = to_float(x) 140 | if sr_in != sr_out: 141 | x = F.resample( 142 | waveform = x, 143 | orig_freq = sr_in, 144 | new_freq = sr_out, 145 | lowpass_filter_width = 64, 146 | rolloff = 0.9475937167399596, 147 | resampling_method = 'sinc_interp_kaiser', 148 | beta = 14.769656459379492, 149 | ) 150 | return x 151 | 152 | def hz_to_st( 153 | frequency_hz, 154 | reference = 1.0, 155 | ): 156 | return 12.0*np.log( frequency_hz / reference ) / np.log(2.0) 157 | 158 | def st_to_hz( 159 | frequency_st, 160 | reference = 1.0, 161 | ): 162 | return reference*pow( 2, frequency_st / 12.0 ) 163 | 164 | def power_to_db( 165 | x: np.ndarray, 166 | ref: float = 1.0, 167 | eps: float = 1e-10, 168 | ) -> np.ndarray: 169 | 170 | x = np.asarray(x) 171 | 172 | if eps <= 0: 173 | raise ValueError( 174 | "Arg eps must be positive" 175 | ) 176 | 177 | ref_value = np.abs(ref) 178 | 179 | db = 10.0 * np.log10(np.maximum(eps, x)) 180 | db -= 10.0 * np.log10(np.maximum(eps, ref_value)) 181 | 182 | return db 183 | 184 | def amplitude_to_db( 185 | x: np.ndarray, 186 | **kwargs, 187 | ) -> np.ndarray: 188 | return power_to_db( 189 | np.abs(x) ** 2, 190 | **kwargs, 191 | ) 192 | 193 | def audio_to_f0( 194 | x: Union[str, torch.Tensor, ArrayLike], 195 | sr_in: int = None, 196 | upper_f0_limit: int = 400, 197 | lower_f0_limit: int = 50, 198 | ): 199 | 200 | try: 201 | import parselmouth 202 | except ImportError: 203 | raise ImportError( 204 | """ 205 | You need to install the library 'parselmouth' 206 | to be able to extract F0 from audio. 207 | """ 208 | ) 209 | 210 | # Check if x is a path to a file. 211 | if isinstance( x, str ): 212 | x, sr_in = torchaudio.load(x) 213 | elif sr_in is None: 214 | raise ValueError( 215 | "Must provide sr_in if x is not a path." 216 | ) 217 | 218 | # If x is a tensor, convert to numpy array 219 | if isinstance( x, torch.Tensor ): 220 | x = x.numpy() 221 | 222 | sr_f0 = 100 223 | 224 | pitch_pm = parselmouth.Sound( 225 | values=x, 226 | sampling_frequency=sr_in, 227 | ).to_pitch() 228 | pitch_pm_times = pitch_pm.xs() 229 | pitch_pm_values = pitch_pm.selected_array["frequency"] 230 | 231 | pitch_pm_samples = [ 232 | x + int(pitch_pm_times[0] * sr_f0 ) 233 | for x in range( 234 | 0, 235 | len(pitch_pm_values), 236 | ) 237 | ] 238 | 239 | # sanity check 240 | if not strictly_increasing(pitch_pm_samples): 241 | print(pitch_pm_samples) 242 | print(pitch_pm_times) 243 | raise ValueError( 244 | "Pitch samples are not strictly increasing." 245 | ) 246 | 247 | valid_range_indices = np.where( 248 | (pitch_pm_values >= lower_f0_limit) 249 | & (pitch_pm_values <= upper_f0_limit) 250 | )[0] 251 | 252 | xnew = [ 253 | x / sr_f0 254 | for x in range( 255 | 0, 256 | int(np.ceil(x.shape[-1] / sr_in * sr_f0)), 257 | ) 258 | ] 259 | if valid_range_indices.size == 0: 260 | ynew = ( 261 | 1 262 | + np.abs( 263 | np.random.normal( 264 | 0.0, 265 | 1.0, 266 | size=len(xnew), 267 | ) 268 | ) 269 | ) * lower_f0_limit 270 | else: 271 | ynew = np.interp( 272 | xnew, 273 | pitch_pm_times[valid_range_indices], 274 | pitch_pm_values[valid_range_indices], 275 | ) 276 | 277 | voiced_flag = np.zeros(len(xnew)) 278 | voiced_flag[ 279 | [x + int(pitch_pm_times[0] * sr_f0) for x in valid_range_indices] 280 | ] = 1 281 | 282 | f0_times = pitch_pm_times[valid_range_indices] 283 | f0_values = pitch_pm_values[valid_range_indices] 284 | 285 | f0 = np.array( [ 286 | f0_times, 287 | f0_values, 288 | ] 289 | ).T 290 | 291 | # concatenate interpolated f0 and voice flag to feature of shape (timesteps, 2) 292 | f0_feature = np.array( 293 | [ 294 | ynew, 295 | voiced_flag, 296 | ] 297 | ).T 298 | 299 | return f0, f0_feature 300 | -------------------------------------------------------------------------------- /vocaltractlab/core.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | import os 5 | import numpy as np 6 | 7 | import vocaltractlab_cython as cyvtl 8 | from vocaltractlab_cython.VocalTractLabApi import _close 9 | from vocaltractlab_cython.VocalTractLabApi import _initialize 10 | #from vocaltractlab_cython import active_speaker 11 | from vocaltractlab_cython import get_constants 12 | from vocaltractlab_cython import gesture_file_to_audio 13 | from vocaltractlab_cython import gesture_file_to_motor_file 14 | from vocaltractlab_cython import phoneme_file_to_gesture_file 15 | from vocaltractlab_cython import synth_block 16 | from vocaltractlab_cython import tract_state_to_limited_tract_state 17 | from vocaltractlab_cython import tract_state_to_transfer_function 18 | from vocaltractlab_cython import tract_state_to_tube_state 19 | #from vocaltractlab_cython.exceptions import VTLAPIError 20 | from target_approximation import TargetSeries 21 | from target_approximation.vocaltractlab import MotorSequence 22 | from target_approximation.vocaltractlab import MotorSeries 23 | from target_approximation.vocaltractlab import SupraGlottalSequence 24 | from target_approximation.vocaltractlab import SupraGlottalSeries 25 | 26 | from typing import Union, List, Tuple, Dict, Any, Optional, Callable, Iterable, Sequence 27 | from numpy.typing import ArrayLike 28 | 29 | from tools_mp import process 30 | 31 | from .utils import make_iterable 32 | from .audioprocessing import audio_to_f0 33 | from .audioprocessing import postprocess 34 | from .frequency_domain import TransferFunction 35 | from .tube_state import TubeState 36 | 37 | 38 | def active_speaker() -> str: 39 | return cyvtl.active_speaker() 40 | 41 | def limit( 42 | x: Union[ 43 | #MotorSequence, 44 | MotorSeries, 45 | #SupraGlottalSequence, 46 | SupraGlottalSeries, 47 | str, 48 | ], 49 | workers: int = None, 50 | verbose: bool = True, 51 | ): 52 | if isinstance( x, MotorSequence ): 53 | ms = x.to_series() 54 | sgs = ms.tract() 55 | elif isinstance( x, MotorSeries ): 56 | sgs = x.tract() 57 | elif isinstance( x, SupraGlottalSequence ): 58 | sgs = x.to_series() 59 | elif isinstance( x, str ): 60 | sgs = SupraGlottalSeries.load( x ) 61 | elif isinstance( x, SupraGlottalSeries ): 62 | sgs = x 63 | else: 64 | raise TypeError( 65 | f""" 66 | The specified data type: '{type(x)}' 67 | is not supported. Type must be one of the following: 68 | - MotorSequence 69 | - MotorSeries 70 | - SupraGlottalSequence 71 | - SupraGlottalSeries 72 | - str 73 | """ 74 | ) 75 | 76 | args = [ 77 | dict( 78 | tract_state = ts, 79 | ) 80 | for ts in sgs.to_numpy( transpose = False ) 81 | ] 82 | 83 | states = process( 84 | tract_state_to_limited_tract_state, 85 | args = args, 86 | return_data = True, 87 | workers = workers, 88 | verbose = verbose, 89 | mp_threshold = 4, 90 | initializer = load_speaker, 91 | initargs = ( cyvtl.active_speaker(), ), 92 | ) 93 | 94 | states = np.array( states ) 95 | lim = SupraGlottalSeries( states ) 96 | if isinstance( x, MotorSeries ): 97 | lim = MotorSeries( lim & states.glottis() ) 98 | 99 | return lim 100 | 101 | def load_speaker( 102 | speaker: str, 103 | ) -> None: 104 | if not speaker.endswith( '.speaker' ): 105 | speaker = f"{speaker}.speaker" 106 | 107 | # check if speaker is a valid file path 108 | if os.path.exists( speaker ): 109 | speaker_path = speaker 110 | else: 111 | speaker_path = os.path.join( 112 | os.path.dirname( __file__ ), 113 | 'speaker', 114 | speaker, 115 | ) 116 | if not os.path.exists( speaker_path ): 117 | raise FileNotFoundError( 118 | f""" 119 | The specified speaker file path: '{speaker}' 120 | does not exist. 121 | """ 122 | ) 123 | _close() 124 | _initialize( speaker_path ) 125 | return 126 | 127 | def speakers() -> List[ str ]: 128 | speaker_dir = os.path.join( 129 | os.path.dirname( __file__ ), 130 | 'speaker', 131 | ) 132 | speakers = [ 133 | os.path.basename(f) 134 | for f in os.listdir( speaker_dir ) 135 | if f.endswith( '.speaker' ) 136 | ] 137 | return speakers 138 | 139 | def gesture_to_audio( 140 | x: Union[ Iterable[ str ], str ], 141 | audio_files: Optional[ Union[ Iterable[ str ], str ] ], 142 | normalize_audio: int = -1, 143 | sr: int = None, 144 | return_data: bool = False, 145 | workers: int = None, 146 | verbose: bool = True, 147 | ) -> None: 148 | 149 | gesture_files = make_iterable( x ) 150 | audio_files = make_iterable( audio_files ) 151 | if len( gesture_files ) != len( audio_files ): 152 | raise ValueError( 153 | f""" 154 | The number of gesture file paths: {len(gesture_files)} 155 | does not match the number of audio file paths: {len(audio_files)}. 156 | """ 157 | ) 158 | 159 | args = [ 160 | dict( 161 | gesture_data = gf, 162 | audio_file_path = af, 163 | verbose_api = False, 164 | normalize_audio = normalize_audio, 165 | sr = sr, 166 | ) 167 | for gf, af in zip( 168 | gesture_files, 169 | audio_files, 170 | ) 171 | ] 172 | audio_data = process( 173 | _gesture_to_audio, 174 | args = args, 175 | return_data = return_data, 176 | workers = workers, 177 | verbose = verbose, 178 | mp_threshold = 4, 179 | initializer = load_speaker, 180 | initargs = ( cyvtl.active_speaker(), ), 181 | ) 182 | return audio_data 183 | 184 | def _gesture_to_audio( 185 | gesture_data, 186 | audio_file_path, 187 | verbose_api, 188 | normalize_audio, 189 | sr, 190 | ) -> np.ndarray: 191 | if isinstance( gesture_data, str ): 192 | #gesture_file = gesture_data.to_gesture_file( file_path = None ) 193 | gesture_file = gesture_data 194 | else: 195 | raise TypeError( 196 | f""" 197 | The specified gesture data type: '{type(gesture_data)}' 198 | is not supported. Type must be one of the following: 199 | - str 200 | """ 201 | ) 202 | audio = gesture_file_to_audio( 203 | ges_file_path = gesture_file, 204 | audio_file_path = None, 205 | verbose_api = verbose_api, 206 | ) 207 | 208 | audio = postprocess( 209 | x = audio, 210 | sr_out = sr, 211 | dBFS = normalize_audio, 212 | file_path = audio_file_path, 213 | to_numpy = True, 214 | ) 215 | 216 | return audio 217 | 218 | def gesture_to_motor( 219 | gesture_files: Union[ Iterable[ str ], str ], 220 | motor_files: Optional[ Union[ Iterable[ str ], str ] ], 221 | workers: int = None, 222 | verbose: bool = True, 223 | ) -> None: 224 | 225 | gesture_files = make_iterable( gesture_files ) 226 | motor_files = make_iterable( motor_files ) 227 | if len( gesture_files ) != len( motor_files ): 228 | raise ValueError( 229 | f""" 230 | The number of gesture file paths: {len(gesture_files)} 231 | does not match the number of motor file paths: {len(motor_files)}. 232 | """ 233 | ) 234 | 235 | args = [ 236 | dict( 237 | gesture_file = gf, 238 | motor_file = mf, 239 | ) 240 | for gf, mf in zip( 241 | gesture_files, 242 | motor_files, 243 | ) 244 | ] 245 | process( 246 | gesture_file_to_motor_file, 247 | args = args, 248 | return_data = False, 249 | workers = workers, 250 | verbose = verbose, 251 | mp_threshold = 4, 252 | initializer = load_speaker, 253 | initargs = ( cyvtl.active_speaker(), ), 254 | ) 255 | return 256 | 257 | def motor_to_audio( 258 | motor_data: Union[ MotorSequence, MotorSeries, str ], 259 | audio_files: Optional[ Union[ Iterable[str], str ] ] = None, 260 | normalize_audio: int = -1, 261 | sr: int = None, 262 | return_data: bool = False, 263 | workers: int = None, 264 | verbose: bool = True, 265 | ) -> np.ndarray: 266 | """ 267 | Convert motor data into audio signals. 268 | 269 | Parameters 270 | ---------- 271 | motor_data : Union[MotorScore, MotorSeries, str] 272 | Input data representing motor scores or series. 273 | Can be a MotorScore object, MotorSeries object, or a path to a file. 274 | 275 | audio_files : Optional[Union[Iterable[str], str]], optional 276 | Path or list of paths to store the generated audio files. 277 | If None, audio files will not be saved. Default is None. 278 | 279 | normalize_audio : int, optional 280 | Amplitude normalization factor. 281 | -1 indicates no normalization. Default is -1. 282 | 283 | sr : int, optional 284 | Sampling rate of the output audio. 285 | If None, defaults to the system's default audio sampling rate. 286 | 287 | return_data : bool, optional 288 | Flag indicating whether to return the generated audio data. 289 | Default is False. 290 | 291 | workers : int, optional 292 | Number of worker processes for parallel processing. 293 | If None, uses the system's default number of CPU cores. 294 | Default is None. 295 | 296 | verbose : bool, optional 297 | Verbosity mode. If True, displays progress information. 298 | Default is True. 299 | 300 | Returns 301 | ------- 302 | np.ndarray 303 | If 'return_data' is True, returns a NumPy array of the generated audio data. 304 | 305 | Raises 306 | ------ 307 | ValueError 308 | If the number of audio file paths doesn't match the number of motor data. 309 | 310 | FileNotFoundError 311 | If the specified motor file path does not exist. 312 | 313 | TypeError 314 | If the specified motor data type is not supported. 315 | Supported types include str, MotorScore, and MotorSeries. 316 | 317 | Notes 318 | ----- 319 | This function converts motor data into audio signals using the VocalTractLab synthesizer. 320 | It processes the motor parameters and generates corresponding audio signals. 321 | 322 | Examples 323 | -------- 324 | # Example 1: Convert MotorScore object to audio without saving files 325 | >>> motor_data = MotorScore(...) # Replace '...' with actual MotorScore data 326 | >>> audio_data = motor_to_audio(motor_data) 327 | 328 | # Example 2: Convert MotorSeries object to audio and save the files 329 | >>> motor_series = MotorSeries(...) # Replace '...' with actual MotorSeries data 330 | >>> audio_files = ['audio1.wav', 'audio2.wav'] # List of paths to save audio files 331 | >>> motor_to_audio(motor_series, audio_files=audio_files, return_data=False) 332 | 333 | # Example 3: Convert motor data from a file to audio with normalization 334 | >>> motor_file_path = 'path/to/motor_data.csv' # Replace with the actual file path 335 | >>> audio_data = motor_to_audio(motor_file_path, normalize_audio=0.5, return_data=True) 336 | """ 337 | 338 | motor_data = make_iterable( motor_data ) 339 | if audio_files is None: 340 | audio_files = [ None ] * len( motor_data ) 341 | else: 342 | audio_files = make_iterable( audio_files ) 343 | if len( audio_files ) != len( motor_data ): 344 | raise ValueError( 345 | f""" 346 | The number of audio file paths: {len(audio_files)} 347 | does not match the number of motor data: {len(motor_data)}. 348 | """ 349 | ) 350 | 351 | args = [ 352 | dict( 353 | motor_data = md, 354 | audio_file_path = audio_file_path, 355 | normalize_audio = normalize_audio, 356 | sr = sr, 357 | ) 358 | for md, audio_file_path in zip( 359 | motor_data, 360 | audio_files, 361 | ) 362 | ] 363 | audio_data = process( 364 | _motor_to_audio, 365 | args = args, 366 | return_data = return_data, 367 | workers = workers, 368 | verbose = verbose, 369 | mp_threshold = 4, 370 | initializer = load_speaker, 371 | initargs = ( cyvtl.active_speaker(), ), 372 | ) 373 | return audio_data 374 | 375 | def _motor_to_audio( 376 | motor_data, 377 | audio_file_path, 378 | normalize_audio, 379 | sr, 380 | state_samples = None, 381 | ): 382 | """ 383 | Generate audio from motor data. 384 | 385 | Parameters 386 | ---------- 387 | motor_data : Union[MotorScore, MotorSeries, str] 388 | Input data representing motor scores or series. 389 | Can be a MotorScore object, MotorSeries object, or a path to a file. 390 | 391 | audio_file_path : Optional[str] 392 | Path to store the generated audio file. If None, audio will not be saved. 393 | 394 | normalize_audio : int 395 | Amplitude normalization factor. Use -1 for no normalization. 396 | 397 | sr : int 398 | Sampling rate of the output audio. 399 | 400 | state_samples : int, optional 401 | Number of samples for state duration. 402 | If None, defaults to a predefined constant value. 403 | 404 | Returns 405 | ------- 406 | torch.Tensor 407 | A tensor representing the generated audio. 408 | 409 | Raises 410 | ------ 411 | FileNotFoundError 412 | If the specified motor file path does not exist. 413 | 414 | TypeError 415 | If the specified motor data type is not supported. 416 | Supported types include str, MotorScore, and MotorSeries. 417 | 418 | Notes 419 | ----- 420 | This function generates audio signals from motor data using the VocalTractLab synthesizer. 421 | It processes the motor parameters and synthesizes corresponding audio signals. 422 | 423 | Examples 424 | -------- 425 | # Example 1: Generate audio from MotorScore object without saving the file 426 | >>> motor_data = MotorScore(...) # Replace '...' with actual MotorScore data 427 | >>> audio_tensor = _motor_to_audio(motor_data, audio_file_path=None, normalize_audio=0, sr=44100) 428 | 429 | # Example 2: Generate audio from MotorSeries object and save the audio file 430 | >>> motor_series = MotorSeries(...) # Replace '...' with actual MotorSeries data 431 | >>> audio_path = 'output_audio.wav' # Path to save the audio file 432 | >>> _motor_to_audio(motor_series, audio_file_path=audio_path, normalize_audio=-1, sr=22050) 433 | 434 | # Example 3: Generate audio from a file containing motor data with custom state samples 435 | >>> motor_file_path = 'path/to/motor_data.csv' # Replace with the actual file path 436 | >>> audio_tensor = _motor_to_audio(motor_file_path, audio_file_path=None, normalize_audio=0.8, sr=44100, state_samples=120) 437 | """ 438 | 439 | if isinstance( motor_data, str ): 440 | if not os.path.exists( motor_data ): 441 | raise FileNotFoundError( 442 | f""" 443 | The specified motor file path: '{motor_data}' 444 | does not exist. 445 | """ 446 | ) 447 | motor_series = MotorSeries.load( 448 | motor_data, 449 | sr = 441, 450 | ) 451 | elif isinstance( motor_data, MotorSequence ): 452 | motor_series = motor_data.to_series( 453 | sr = 441, 454 | ) 455 | elif isinstance( motor_data, MotorSeries ): 456 | motor_series = motor_data 457 | else: 458 | raise TypeError( 459 | f""" 460 | The specified motor data type: '{type(motor_data)}' 461 | is not supported. Type must be one of the following: 462 | - str 463 | - MotorSequence 464 | - MotorSeries 465 | """ 466 | ) 467 | if motor_series.sr is None: 468 | raise ValueError( 469 | f""" 470 | The specified motor series has no asociated sampling 471 | rate and thus, cannot be used for audio generation. 472 | Please ensure that the sampling rate is set before 473 | generating audio. 474 | """ 475 | ) 476 | vtl_constants = get_constants() 477 | if state_samples is None: 478 | #state_samples = vtl_constants[ 'n_samples_per_state' ] 479 | state_samples = int( 480 | vtl_constants[ 'sr_audio' ] / motor_series.sr 481 | ) 482 | 483 | 484 | #print( motor_series.to_numpy( part='tract' ) ) 485 | 486 | tract_params = motor_series.tract().to_numpy( transpose = False ) 487 | glottal_params = motor_series.glottis().to_numpy( transpose = False ) 488 | #print( tract_params.shape ) 489 | #print( glottal_params.shape ) 490 | #print( state_samples ) 491 | 492 | 493 | audio = synth_block( 494 | tract_parameters = tract_params, 495 | glottis_parameters = glottal_params, 496 | state_samples = state_samples, 497 | verbose_api = False, 498 | ) 499 | 500 | audio = postprocess( 501 | x = audio, 502 | sr_out = sr, 503 | dBFS = normalize_audio, 504 | file_path = audio_file_path, 505 | to_numpy = True, 506 | ) 507 | 508 | return audio 509 | 510 | def motor_to_transfer_function( 511 | x: Union[ 512 | MotorSequence, 513 | MotorSeries, 514 | SupraGlottalSequence, 515 | SupraGlottalSeries, 516 | str, 517 | ], 518 | n_spectrum_samples: int = 8192, 519 | save_magnitude_spectrum: bool = True, 520 | save_phase_spectrum: bool = True, 521 | workers: int = None, 522 | verbose: bool = True, 523 | ): 524 | if isinstance( x, MotorSequence ): 525 | ms = x.to_series() 526 | sgs = ms.glottis() 527 | elif isinstance( x, MotorSeries ): 528 | sgs = x.glottis() 529 | elif isinstance( x, SupraGlottalSequence ): 530 | sgs = x.to_series() 531 | elif isinstance( x, str ): 532 | sgs = SupraGlottalSeries.load( x ) 533 | elif isinstance( x, SupraGlottalSeries ): 534 | sgs = x 535 | else: 536 | raise TypeError( 537 | f""" 538 | The specified data type: '{type(x)}' 539 | is not supported. Type must be one of the following: 540 | - MotorSequence 541 | - MotorSeries 542 | - SupraGlottalSequence 543 | - SupraGlottalSeries 544 | - str 545 | """ 546 | ) 547 | args = [ 548 | dict( 549 | tract_state = ts, 550 | n_spectrum_samples = n_spectrum_samples, 551 | save_magnitude_spectrum = save_magnitude_spectrum, 552 | save_phase_spectrum = save_phase_spectrum, 553 | ) 554 | for ts in sgs.to_numpy( transpose = False ) 555 | ] 556 | 557 | trf_data = process( 558 | _motor_to_transfer_function, 559 | args = args, 560 | return_data = True, 561 | workers = workers, 562 | verbose = verbose, 563 | mp_threshold = 4, 564 | initializer = load_speaker, 565 | initargs = ( cyvtl.active_speaker(), ), 566 | ) 567 | 568 | return trf_data 569 | 570 | def _motor_to_transfer_function( **kwargs ): 571 | x = tract_state_to_transfer_function( **kwargs ) 572 | x[ 'tract_state' ] = kwargs[ 'tract_state' ] 573 | return TransferFunction.from_dict( x ) 574 | 575 | def motor_to_tube( 576 | x: Union[ 577 | MotorSequence, 578 | MotorSeries, 579 | SupraGlottalSequence, 580 | SupraGlottalSeries, 581 | str, 582 | ], 583 | save_tube_length: bool = True, 584 | save_tube_area: bool = True, 585 | save_tube_articulator: bool = True, 586 | save_incisor_position: bool = True, 587 | save_tongue_tip_side_elevation: bool = True, 588 | save_velum_opening: bool = True, 589 | fast_calculation = True, 590 | workers: int = None, 591 | verbose: bool = True, 592 | ) -> np.ndarray: 593 | 594 | if isinstance( x, MotorSequence ): 595 | ms = x.to_series() 596 | sgs = ms.glottis() 597 | elif isinstance( x, MotorSeries ): 598 | sgs = x.glottis() 599 | elif isinstance( x, SupraGlottalSequence ): 600 | sgs = x.to_series() 601 | elif isinstance( x, str ): 602 | sgs = SupraGlottalSeries.load( x ) 603 | elif isinstance( x, SupraGlottalSeries ): 604 | sgs = x 605 | else: 606 | raise TypeError( 607 | f""" 608 | The specified data type: '{type(x)}' 609 | is not supported. Type must be one of the following: 610 | - MotorSequence 611 | - MotorSeries 612 | - SupraGlottalSequence 613 | - SupraGlottalSeries 614 | - str 615 | """ 616 | ) 617 | 618 | args = [ 619 | dict( 620 | tract_state = ts, 621 | fast_calculation = fast_calculation, 622 | save_tube_length = save_tube_length, 623 | save_tube_area = save_tube_area, 624 | save_tube_articulator = save_tube_articulator, 625 | save_incisor_position = save_incisor_position, 626 | save_tongue_tip_side_elevation = save_tongue_tip_side_elevation, 627 | save_velum_opening = save_velum_opening, 628 | ) 629 | for ts in sgs.to_numpy( transpose = False ) 630 | ] 631 | 632 | tube_data = process( 633 | _motor_to_tube, 634 | args = args, 635 | return_data = True, 636 | workers = workers, 637 | verbose = verbose, 638 | mp_threshold = 4, 639 | initializer = load_speaker, 640 | initargs = ( cyvtl.active_speaker(), ), 641 | ) 642 | 643 | return tube_data 644 | 645 | def _motor_to_tube( **kwargs ): 646 | x = tract_state_to_tube_state( **kwargs ) 647 | x[ 'tract_state' ] = kwargs[ 'tract_state' ] 648 | return TubeState.from_dict( x ) 649 | 650 | def phoneme_to_audio( 651 | x: List[ str ], 652 | gesture_files: List[ str ], 653 | motor_files: List[ str ], 654 | f0_files: Optional[ List[ str ] ] = None, 655 | motor_f0_files: Optional[ List[ str ] ] = None, 656 | audio_files: Optional[ List[ str ] ] = None, 657 | normalize_audio = -1, 658 | sr = None, 659 | return_data = False, 660 | workers: int = None, 661 | verbose: bool = True, 662 | ): 663 | 664 | phoneme_to_motor( 665 | x = x, 666 | gesture_files = gesture_files, 667 | motor_files = motor_files, 668 | workers = workers, 669 | verbose = verbose, 670 | ) 671 | 672 | if f0_files is not None: 673 | if motor_f0_files is None: 674 | ms_data = augment_motor_f0( 675 | motor_files = motor_files, 676 | f0_files = f0_files, 677 | out_files = motor_f0_files, 678 | return_data = True, 679 | workers = workers, 680 | verbose = verbose, 681 | ) 682 | else: 683 | augment_motor_f0( 684 | motor_files = motor_files, 685 | f0_files = f0_files, 686 | out_files = motor_f0_files, 687 | return_data = False, 688 | workers = workers, 689 | verbose = verbose, 690 | ) 691 | ms_data = motor_f0_files 692 | 693 | else: 694 | ms_data = motor_files 695 | 696 | audio_data = motor_to_audio( 697 | motor_data = ms_data, 698 | audio_files = audio_files, 699 | normalize_audio = normalize_audio, 700 | sr = sr, 701 | return_data = return_data, 702 | workers = workers, 703 | verbose = verbose, 704 | ) 705 | 706 | return audio_data 707 | 708 | def phoneme_to_gesture( 709 | x: List[ str ], 710 | gesture_files: List[ str ], 711 | workers: int = None, 712 | verbose: bool = True, 713 | ) -> np.ndarray: 714 | phoneme_files = make_iterable( x ) 715 | # TODO: implement phn sequence to phn file 716 | gesture_files = make_iterable( gesture_files ) 717 | if len( phoneme_files ) != len( gesture_files ): 718 | raise ValueError( 719 | f""" 720 | The number of phoneme file paths: {len(phoneme_files)} 721 | does not match the number of gesture file paths: {len(gesture_files)}. 722 | """ 723 | ) 724 | 725 | args = [ 726 | dict( 727 | phoneme_file = pf, 728 | gesture_file = gf, 729 | verbose_api = False, 730 | ) 731 | for pf, gf in zip( 732 | phoneme_files, 733 | gesture_files, 734 | ) 735 | ] 736 | process( 737 | phoneme_file_to_gesture_file, 738 | args = args, 739 | return_data = False, 740 | workers = workers, 741 | verbose = verbose, 742 | mp_threshold = 4, 743 | initializer = load_speaker, 744 | initargs = ( cyvtl.active_speaker(), ), 745 | ) 746 | return 747 | 748 | def phoneme_to_motor( 749 | x: List[ str ], 750 | gesture_files: List[ str ], 751 | motor_files: List[ str ], 752 | workers: int = None, 753 | verbose: bool = True, 754 | ): 755 | 756 | phoneme_to_gesture( 757 | x = x, 758 | gesture_files = gesture_files, 759 | workers = workers, 760 | verbose = verbose, 761 | ) 762 | 763 | gesture_to_motor( 764 | gesture_files = gesture_files, 765 | motor_files = motor_files, 766 | workers = workers, 767 | verbose = verbose, 768 | ) 769 | 770 | return 771 | 772 | def augment_motor_f0( 773 | motor_files: Union[ Iterable[ str ], str ], 774 | f0_files: Union[ Iterable[ str ], str ], 775 | out_files: Optional[ Union[ Iterable[ str ], str ] ] = None, 776 | target_sr: int = 441, 777 | return_data: bool = False, 778 | workers: int = None, 779 | verbose: bool = True, 780 | **kwargs, 781 | ): 782 | motor_files = make_iterable( motor_files ) 783 | f0_files = make_iterable( f0_files ) 784 | if len( motor_files ) != len( f0_files ): 785 | raise ValueError( 786 | f""" 787 | The number of motor file paths: {len(motor_files)} 788 | does not match the number of f0 file paths: {len(f0_files)}. 789 | """ 790 | ) 791 | if out_files is not None: 792 | out_files = make_iterable( out_files ) 793 | if len( motor_files ) != len( out_files ): 794 | raise ValueError( 795 | f""" 796 | The number of motor file paths: {len(motor_files)} 797 | does not match the number of output file paths: {len(out_files)}. 798 | """ 799 | ) 800 | 801 | args = [ 802 | dict( 803 | motor_file = mf, 804 | f0_file = ff, 805 | out_file = of, 806 | target_sr = target_sr, 807 | **kwargs, 808 | ) 809 | for mf, ff, of in zip( 810 | motor_files, 811 | f0_files, 812 | out_files, 813 | ) 814 | ] 815 | 816 | ms_data = process( 817 | _augment_motor_f0, 818 | args = args, 819 | return_data = return_data, 820 | workers = workers, 821 | verbose = verbose, 822 | mp_threshold = 4, 823 | # Don't need to load the speaker for this function 824 | # Function does not use the VocalTractLab API 825 | ) 826 | return ms_data 827 | 828 | def _augment_motor_f0( 829 | motor_file, 830 | f0_file, 831 | out_file, 832 | target_sr, 833 | **kwargs, 834 | ): 835 | ms = MotorSeries.load( motor_file ) 836 | ms.resample( target_sr = target_sr ) 837 | 838 | _, feature = audio_to_f0( f0_file ) 839 | f0 = feature[ :, 0 ] 840 | tgss = TargetSeries( 841 | series = f0, 842 | sr = 100, 843 | tiers = [ 'F0' ], 844 | ) 845 | tgss.resample( target_sr = target_sr ) 846 | ms = ms & tgss 847 | 848 | if out_file is not None: 849 | ms.save( out_file, **kwargs ) 850 | return ms 851 | 852 | 853 | #def _supra_glottal_state_to_svg_str( args ): 854 | # supra_glottal_state = args 855 | # svgStr = ( ' ' * 10000 ).encode() 856 | # constants = get_constants() 857 | # cdef np.ndarray[np.float64_t, ndim = 1] tractParams = np.zeros( 858 | # constants['n_tract_params'], 859 | # dtype = 'float64', 860 | # ) 861 | # tractParams = supra_glottal_state.ravel() 862 | # vtlExportTractSvgToStr( 863 | # &tractParams[0], 864 | # svgStr, 865 | # ) 866 | # return svgStr.decode() -------------------------------------------------------------------------------- /vocaltractlab/frequency_domain.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from vocaltractlab_cython import get_constants 5 | #import target_approximation.utils as PT 6 | from target_approximation.utils import finalize_plot 7 | from target_approximation.utils import get_plot 8 | from target_approximation.utils import get_plot_limits 9 | #import librosa 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | from scipy.signal import find_peaks 13 | 14 | from vocaltractlab.utils import multiple_formatter 15 | from vocaltractlab.audioprocessing import amplitude_to_db 16 | 17 | 18 | 19 | class TransferFunction(): 20 | def __init__( 21 | self, 22 | tract_state: np.ndarray, 23 | magnitude_spectrum: np.ndarray, 24 | phase_spectrum: np.ndarray, 25 | n_spectrum_samples: int, 26 | #name: str = 'transfer_function' 27 | ): 28 | if not isinstance( n_spectrum_samples, int ): 29 | raise ValueError( 30 | f""" 31 | Argument n_spectrum_samples must be an integer 32 | and should be a power of 2, but you passed: 33 | {n_spectrum_samples} 34 | """ 35 | ) 36 | self.constants = get_constants() 37 | self.tract_state = tract_state 38 | self.delta_frequency = self.constants[ 'sr_audio' ] / n_spectrum_samples 39 | max_bin = round( n_spectrum_samples / self.delta_frequency ) 40 | self.n_spectrum_samples = n_spectrum_samples 41 | if isinstance( magnitude_spectrum, np.ndarray ): 42 | self.magnitude_spectrum = magnitude_spectrum[ : max_bin ] 43 | else: 44 | self.magnitude_spectrum = None 45 | if isinstance( phase_spectrum, np.ndarray ): 46 | self.phase_spectrum = phase_spectrum[ : max_bin ] 47 | else: 48 | self.phase_spectrum = None 49 | self.data = dict( 50 | frequency = self.magnitude_spectrum, 51 | phase = self.phase_spectrum, 52 | ) 53 | self.formants = self.get_formants() 54 | self.f1, self.f2, self.f3, self.f4 = self.formants 55 | return 56 | 57 | @classmethod 58 | def from_dict( 59 | cls, 60 | x, 61 | ): 62 | return cls( 63 | tract_state = x[ 'tract_state' ], 64 | magnitude_spectrum = x[ 'magnitude_spectrum' ], 65 | phase_spectrum = x[ 'phase_spectrum' ], 66 | n_spectrum_samples = x[ 'n_spectrum_samples' ], 67 | ) 68 | 69 | def get_formants( 70 | self, 71 | peak_distance = 1, 72 | # = 44100, 73 | ): 74 | sr = self.constants[ 'sr_audio' ] 75 | peaks, _ = find_peaks( 76 | self.magnitude_spectrum, 77 | distance = peak_distance, 78 | ) 79 | peaks = [ 80 | peak * sr/self.n_spectrum_samples 81 | for peak in peaks 82 | ] 83 | while peaks[ 0 ] < 100: 84 | del peaks[ 0 ] 85 | if len( peaks ) < 4: 86 | peaks.extend( [ 87 | None for _ in range( 0, 4 - len( peaks ) ) 88 | ] ) 89 | elif len( peaks ) > 4: 90 | peaks = peaks[ : 4 ] 91 | return peaks 92 | 93 | def plot( self, 94 | parameters = [ 'frequency', 'phase' ], 95 | plot_formants = True, 96 | axs: list = None, 97 | plot_kwargs: list = [ dict( color = 'navy' ), dict( color = 'darkorange' ) ], 98 | **kwargs, 99 | ): #, scale = 'dB' ): 100 | figure, axs = get_plot( n_rows = len( parameters ), axs = axs ) 101 | for index, parameter in enumerate( parameters ): 102 | if parameter == 'frequency': 103 | y = amplitude_to_db( self.data[ parameter ] ) 104 | continuities = [ slice( 0, len(y) ) ] 105 | y_title = 'Intensity [dB]' 106 | #_min = np.min( y ) 107 | #_max = np.max( y ) 108 | #axs[ index ].set( ylim = [ _min - 0.1 * np.abs( _max - _min ), _max + 0.1 * np.abs( _max - _min ) ] ) 109 | axs[ index ].set( ylim = get_plot_limits( y ) ) 110 | axs[ index ].locator_params( axis = 'y', nbins = 4 ) 111 | elif parameter == 'phase': 112 | continuities = [] 113 | y = self.data[ parameter ] 114 | tmp_idx = 0 115 | for idx in range( 0, len(y) - 1 ): 116 | if np.abs( y[idx] - y[idx+1] ) > 1.5: 117 | continuities.append( slice( tmp_idx, idx+1 ) ) 118 | tmp_idx = idx + 1 119 | if tmp_idx != len( y ): 120 | continuities.append( slice( tmp_idx, len( y ) ) ) 121 | 122 | y = self.data[ parameter ] 123 | y_title = 'Phase' 124 | axs[ index ].yaxis.set_major_locator(plt.MultipleLocator(np.pi / 2)) 125 | #axs[ index ].yaxis.set_minor_locator(plt.MultipleLocator(np.pi / 12)) 126 | axs[ index ].yaxis.set_major_formatter(plt.FuncFormatter(multiple_formatter())) 127 | axs[ index ].set( ylim = [ -3.76, 3.76 ] ) 128 | else: 129 | raise ValueError( 'parameters must be frequency and/or phase! Passed values are: {}'.format( parameters ) ) 130 | x = np.arange( 0, self.n_spectrum_samples, self.delta_frequency ) 131 | for _slice in continuities: 132 | axs[ index ].plot( x[ _slice ], y[ _slice ], **plot_kwargs[ index ] ) 133 | axs[ index ].set( ylabel = y_title ) 134 | plt.xlabel( 'Frequency [Hz]' ) 135 | if plot_formants: 136 | for formant in self.formants: 137 | for ax in axs: 138 | ax.axvline( formant, color = 'gray', ls = '--' ) 139 | for ax in axs: 140 | ax.label_outer() 141 | finalize_plot( figure, axs, **kwargs ) 142 | return axs -------------------------------------------------------------------------------- /vocaltractlab/logo/VocalTractLabPythonLogo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 17 | 40 | 42 | 52 | 55 | 59 | 63 | 67 | 68 | 78 | 81 | 85 | 89 | 93 | 94 | 103 | 106 | 110 | 114 | 118 | 119 | 125 | 126 | 131 | 136 | 141 | 145 | VOCALTRACTLAB (Python) 171 | 172 | 173 | -------------------------------------------------------------------------------- /vocaltractlab/speaker/female_6_years_0_months.speaker: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | -------------------------------------------------------------------------------- /vocaltractlab/tube_state.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from target_approximation.utils import finalize_plot 5 | from target_approximation.utils import get_plot 6 | from target_approximation.utils import get_plot_limits 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | 11 | 12 | class TubeState(): 13 | def __init__( 14 | self, 15 | tract_state, 16 | tube_length, 17 | tube_area, 18 | tube_articulator, 19 | incisor_position, 20 | tongue_tip_side_elevation, 21 | velum_opening, 22 | ): 23 | self.tract_state = tract_state 24 | self.tube_length = tube_length 25 | self.tube_area = tube_area 26 | self.tube_articulator = tube_articulator 27 | self.incisor_position = incisor_position 28 | self.tongue_tip_side_elevation = tongue_tip_side_elevation 29 | self.velum_opening = velum_opening 30 | self.open_limit = 0.3 # 0.3 cm^2 for open tracts 31 | self.tight_limit = 0.001 # above 0.001 tight, below or equal closed # actual value is 0.0001 however 32 | self.constriction = self.get_constriction_class( 33 | tube_area = np.min( tube_area ), 34 | ) 35 | self.constriction_data = self.get_constriction_threshold_crossings() 36 | return 37 | 38 | @classmethod 39 | def from_dict( 40 | cls, 41 | x, 42 | ): 43 | return cls( 44 | tract_state = x[ 'tract_state' ], 45 | tube_length = x[ 'tube_length' ], 46 | tube_area = x[ 'tube_area' ], 47 | tube_articulator = x[ 'tube_articulator' ], 48 | incisor_position = x[ 'incisor_position' ], 49 | tongue_tip_side_elevation = x[ 'tongue_tip_side_elevation' ], 50 | velum_opening = x[ 'velum_opening' ], 51 | ) 52 | 53 | #def get_constriction( self, return_str = False ): 54 | # constriction_strings = [ 'open', 'tight', 'closed' ] 55 | # min_area = np.min( self.tube_area ) 56 | # constriction = None 57 | # if min_area >= self.open_limit: 58 | # constriction = 0 59 | # elif np.isclose( min_area, 0.15 ): 60 | # constriction = 3 61 | # elif np.isclose( min_area, 0.25 ): 62 | # constriction = 4 63 | # elif min_area > self.tight_limit: 64 | # constriction = 1 65 | # elif np.isclose( min_area, 0.0001 ): 66 | # constriction = 2 67 | # if not return_str: 68 | # return constriction 69 | # else: 70 | # return constriction_strings[ constriction ] 71 | 72 | def get_constriction_class( 73 | self, 74 | tube_area, 75 | ): 76 | constriction = None 77 | if tube_area >= self.open_limit: 78 | constriction = 0 79 | elif np.isclose( tube_area, 0.15 ): 80 | constriction = 3 81 | elif np.isclose( tube_area, 0.25 ): 82 | constriction = 4 83 | elif tube_area > self.tight_limit: 84 | constriction = 1 85 | elif np.isclose( tube_area, 0.0001 ): 86 | constriction = 2 87 | elif tube_area <= self.tight_limit: 88 | constriction = 5 89 | return constriction 90 | 91 | def get_tube_area_function( self ): 92 | tube_x = [ self.tube_length[ 0 ] ] 93 | for length in self.tube_length[ 1: ]: 94 | tube_x.append( tube_x[ -1 ] + length ) 95 | x = np.arange( 0, np.sum( self.tube_length ), 0.01 ) 96 | y = [] 97 | tmp_length = 0 98 | for index, _ in enumerate( self.tube_length ): 99 | for val in x: 100 | if val >= tmp_length: 101 | if val <= tube_x[ index ]: 102 | y.append( self.tube_area[ index ] ) 103 | else: 104 | tmp_length = tube_x[ index ] 105 | break 106 | return np.array( [ x, y ] ).T 107 | 108 | def get_constriction_threshold_crossings( 109 | self, 110 | n_tongue_sections = 8, 111 | ): 112 | tight_crossings = [] 113 | close_crossings = [] 114 | tight_crossed = False 115 | close_crossed = False 116 | tight_articulators = [] 117 | close_articulators = [] 118 | #for x in tube_area_function: 119 | # y = x[1] 120 | # if tight_crossed == False and y < self.open_limit: 121 | # tight_crossings.append( x ) 122 | # tight_crossed = True 123 | # if tight_crossed == True and y >= self.open_limit: 124 | # tight_crossings.append( x ) 125 | # tight_crossed = False 126 | # if close_crossed == False and y < self.tight_limit: 127 | # close_crossings.append( x ) 128 | # close_crossed = True 129 | # if close_crossed == True and y >= self.tight_limit: 130 | # close_crossings.append( x ) 131 | # close_crossed = False 132 | articulator_token = { 133 | '1': 'T',# tongue; 134 | '2': 'I',#= lower incisors; 135 | '3': 'L', 136 | '4': 'O',# = lower lip; 4 = other 137 | } 138 | n_tongue = len( [ ar for ar in self.tube_articulator if ar == 1 ] ) 139 | tube_articulator_tokens = [] 140 | tongue_counter = 0 141 | tongue_section = -1 142 | for ar in self.tube_articulator: 143 | #print(ar) 144 | if ar == 1: 145 | if ( tongue_counter % round(n_tongue/n_tongue_sections) == 0 ) and ( tongue_section < (n_tongue_sections-1) ): 146 | tongue_section += 1 147 | tongue_counter += 1 148 | tube_articulator_tokens.append( 149 | 'T{}'.format( tongue_section ) 150 | ) 151 | else: 152 | tube_articulator_tokens.append( 153 | articulator_token[ str( ar ) ] 154 | ) 155 | self.tube_articulator_tokens = tube_articulator_tokens 156 | 157 | assert len( self.tube_area ) == len( self.tube_length ), 'Not the same length, ta: {}, tl: {}'.format( 158 | len( self.tube_area ), 159 | len( self.tube_length ), 160 | ) 161 | assert len( self.tube_area ) == len( self.tube_articulator_tokens ), 'Not the same length, ta: {}, ar: {}'.format( 162 | len( self.tube_area ), 163 | len( self.tube_articulator_tokens ), 164 | ) 165 | 166 | x = 0 167 | for ta, tl, ar in zip( self.tube_area, self.tube_length, self.tube_articulator_tokens ): 168 | if tight_crossed == False and ta < self.open_limit: 169 | tight_crossings.append( x ) 170 | tight_tb_articulators = [] 171 | tight_tb_articulators.append( 172 | dict( 173 | start = x, 174 | place_of_articulation = ar, 175 | tube_area = ta, 176 | constriction_class = self.get_constriction_class( ta ), 177 | ) 178 | ) 179 | #tight_articulators.append( [ x, ar ] ) 180 | tight_crossed = True 181 | elif tight_crossed == True and ta < self.open_limit: 182 | #tight_articulators.append( [ x, ar ] ) 183 | tight_tb_articulators.append( 184 | dict( 185 | start = x, 186 | place_of_articulation = ar, 187 | tube_area = ta, 188 | constriction_class = self.get_constriction_class( ta ), 189 | ) 190 | ) 191 | elif tight_crossed == True and ta >= self.open_limit: 192 | tight_crossings.append( x ) 193 | #tight_articulators.append( ar ) 194 | tight_articulators.append( tight_tb_articulators ) 195 | tight_crossed = False 196 | if close_crossed == False and ta < self.tight_limit: 197 | close_crossings.append( x ) 198 | #close_articulators.append( [ x, ar ] ) 199 | close_tb_articulators = [] 200 | close_tb_articulators.append( 201 | dict( 202 | start = x, 203 | place_of_articulation = ar, 204 | tube_area = ta, 205 | constriction_class = self.get_constriction_class( ta ), 206 | ) 207 | ) 208 | close_crossed = True 209 | elif close_crossed == True and ta < self.tight_limit: 210 | close_tb_articulators.append( 211 | dict( 212 | start = x, 213 | place_of_articulation = ar, 214 | tube_area = ta, 215 | constriction_class = self.get_constriction_class( ta ), 216 | ) 217 | ) 218 | elif close_crossed == True and ta >= self.tight_limit: 219 | close_articulators.append( close_tb_articulators ) 220 | close_crossings.append( x ) 221 | close_crossed = False 222 | x += tl 223 | if tight_crossed == True: 224 | tight_crossings.append( x ) 225 | tight_articulators.append( tight_tb_articulators ) 226 | if close_crossed == True: 227 | close_crossings.append( x ) 228 | close_articulators.append( close_tb_articulators ) 229 | 230 | tight_constrictions = self.get_constriction_info( tight_crossings, tight_articulators ) 231 | close_constrictions = self.get_constriction_info( close_crossings, close_articulators ) 232 | constriction_data = dict( 233 | n_constrictions = len( tight_constrictions ) + len( close_constrictions ) - (1 if len(close_constrictions) > 0 else 0), 234 | tight_constrictions = tight_constrictions, 235 | close_constrictions = close_constrictions, 236 | ) 237 | return constriction_data 238 | 239 | def get_constriction_info( self, threshold_crossings, articulators ): 240 | constrictions = [] 241 | index = 0 242 | ar_id = 0 243 | while index < len( threshold_crossings ) - 1: 244 | #print( articulators ) 245 | constrictions.append( 246 | dict( 247 | start = threshold_crossings[ index ], 248 | end = threshold_crossings[ index + 1 ], 249 | length = threshold_crossings[ index + 1 ] - threshold_crossings[ index ], 250 | articulators = articulators[ ar_id ], 251 | #area = None, 252 | ) 253 | ) 254 | index += 2 255 | ar_id += 1 256 | #if len( threshold_crossings ) % 2 != 0: 257 | # start = threshold_crossings[ len( threshold_crossings ) - 1 ] 258 | # end = np.sum( self.tube_length ) 259 | # print( articulators ) 260 | # constrictions.append( 261 | # dict( 262 | # start = start, 263 | # end = end, 264 | # length = end-start, 265 | # articulators = articulators[ -1 ], 266 | # #area = 0, 267 | # ) 268 | # ) 269 | return constrictions 270 | 271 | def has_precise_constriction( self ): 272 | tube_area_function = self.get_tube_area_function() 273 | tight_crossings, close_crossings = self.get_constriction_threshold_crossings( tube_area_function ) 274 | threshold_crossings = [ len( tight_crossings ), len( close_crossings ) ] 275 | if self.constriction == 2: 276 | if not threshold_crossings in [ [1,1], [2,2] ]: 277 | return False 278 | elif self.constriction == 1: 279 | if not threshold_crossings in [ [2,0], [1,0] ]: 280 | return False 281 | elif self.constriction == 0: 282 | return False 283 | #if self.constriction_has_local_minimum( x, y, ): 284 | # return False 285 | if threshold_crossings == [ 1, 0 ]: 286 | if tight_crossings[0][0] <= ( 1 - 0.125 ) * np.max( tube_area_function[ :, 0 ] ): 287 | return False 288 | elif threshold_crossings == [ 1, 1 ]: 289 | if np.abs( (np.max( tube_area_function[ :, 0 ] ) - close_crossings[0][0]) - (close_crossings[0][0] - tight_crossings[0][0]) ) >= 1: 290 | return False 291 | elif threshold_crossings == [ 2, 2 ]: 292 | minimum = 0.5 * ( close_crossings[0][0] + close_crossings[1][0] ) 293 | if np.abs( np.abs( minimum - tight_crossings[0][0] ) - np.abs( tight_crossings[1][0] - minimum ) ) >= 1: 294 | return False 295 | 296 | 297 | return True 298 | 299 | def plot( self, 300 | axs = None, 301 | **kwargs, 302 | ): 303 | #articulators = { 304 | # '1': 'T',# tongue; 305 | # '2': 'I',#= lower incisors; 306 | # '3': 'L', 307 | # '4': 'O',# = lower lip; 4 = other 308 | #} 309 | figure, axs = get_plot( n_rows = 1, axs = axs ) 310 | tube_area_function = self.get_tube_area_function() 311 | axs[0].set( xlabel = 'Tube Length [cm]', ylabel = r'Cross-sectional Area [cm$^2$]' ) 312 | #y = [ val for val in x ] 313 | #x = [ self.tube_length[ 0 ] ] 314 | #for length in self.tube_length[ 1: ]: 315 | # x.append( x[ -1 ] + length ) 316 | axs[0].plot( tube_area_function[ :, 0 ], tube_area_function[ :, 1 ] ) 317 | constriction_data = self.get_constriction_threshold_crossings() 318 | tight_constrictions = constriction_data[ 'tight_constrictions' ] 319 | close_constrictions = constriction_data[ 'close_constrictions' ] 320 | for tight_constriction in tight_constrictions: 321 | #axs[0].scatter( tight_crossing[0], tight_crossing[1], color = 'red', marker = 'x' ) 322 | axs[0].scatter( tight_constriction[ 'start' ], 0.3, color = 'red', marker = 'x' ) 323 | axs[0].scatter( tight_constriction[ 'end' ], 0.3, color = 'red', marker = 'x' ) 324 | axs[0].plot( [tight_constriction[ 'start' ], tight_constriction[ 'start' ] + tight_constriction[ 'length' ] ], [ 0.9 , 0.9 ] ) 325 | for element in tight_constriction[ 'articulators' ]: 326 | axs[0].text( s=element[ 'place_of_articulation' ], x=element[ 'start' ], y = 0.35 ) 327 | for close_constriction in close_constrictions: 328 | #axs[0].scatter( close_crossing[0], close_crossing[1], color = 'green', marker = 'o' ) 329 | #axs[0].scatter( close_crossing[0], 0.001, color = 'green', marker = 'o' ) 330 | axs[0].scatter( close_constriction[ 'start' ], 0.001, color = 'green', marker = 'o' ) 331 | axs[0].scatter( close_constriction[ 'end' ], 0.001, color = 'green', marker = 'o' ) 332 | axs[0].plot( [close_constriction[ 'start' ], close_constriction[ 'start' ] + close_constriction[ 'length' ] ], [ 0.01 , 0.01 ] ) 333 | axs[0].axhline( 0.3, color = 'gray', ls = '--' ) 334 | axs[0].axhline( 0.001, color = 'gray', ls = '-.' ) 335 | axs[0].set( yscale = 'log' ) 336 | finalize_plot( figure, axs, **kwargs ) 337 | #ax.set( xlabel = 'Tube Length [cm]', ylabel = r'Cross-sectional Area [cm$^2$]' ) 338 | return axs -------------------------------------------------------------------------------- /vocaltractlab/utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | from typing import Union, List, Tuple, Dict, Any, Optional, Callable, Iterable, Sequence 8 | from numpy.typing import ArrayLike 9 | 10 | 11 | 12 | def make_iterable( x ): 13 | if isinstance( x, str ) or not isinstance( x, Iterable ): 14 | return [ x ] 15 | return x 16 | 17 | def multiple_formatter( 18 | denominator=2, 19 | number=np.pi, 20 | latex='\\pi', 21 | ): 22 | def gcd(a, b): 23 | while b: 24 | a, b = b, a%b 25 | return a 26 | def _multiple_formatter(x, pos): 27 | den = denominator 28 | num = np.int(np.rint(den*x/number)) 29 | com = gcd(num,den) 30 | (num,den) = (int(num/com),int(den/com)) 31 | if den==1: 32 | if num==0: 33 | return r'$0$' 34 | if num==1: 35 | return r'$%s$'%latex 36 | elif num==-1: 37 | return r'$-%s$'%latex 38 | else: 39 | return r'$%s%s$'%(num,latex) 40 | else: 41 | if num==1: 42 | return r'$\frac{%s}{%s}$'%(latex,den) 43 | elif num==-1: 44 | return r'$\frac{-%s}{%s}$'%(latex,den) 45 | else: 46 | return r'$\frac{%s%s}{%s}$'%(num,latex,den) 47 | return _multiple_formatter 48 | 49 | class Multiple: 50 | def __init__( 51 | self, 52 | denominator=2, 53 | number=np.pi, 54 | latex='\\pi', 55 | ): 56 | self.denominator = denominator 57 | self.number = number 58 | self.latex = latex 59 | def locator(self): 60 | return plt.MultipleLocator( 61 | self.number / self.denominator 62 | ) 63 | def formatter(self): 64 | return plt.FuncFormatter( 65 | multiple_formatter( 66 | self.denominator, 67 | self.number, 68 | self.latex, 69 | ) 70 | ) 71 | 72 | def strictly_increasing( L ): 73 | return all( 74 | x < y 75 | for x, y in zip( 76 | L, 77 | L[1:], 78 | ) 79 | ) --------------------------------------------------------------------------------