├── .gitignore ├── CHANGELOG.md ├── DEVELOP.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── examples ├── __init__.py ├── estimate_speech_rate.py ├── files │ ├── introduction.TextGrid │ ├── introduction.wav │ ├── tone_split_data.TextGrid │ └── tone_split_data.wav ├── frequency.py ├── split_audio_on_silence.py └── split_audio_on_tone.py ├── matlabScripts ├── detect_syllable_nuclei.m └── nucleus_detection_matlab │ ├── column2rowvec.m │ ├── fu_filter.m │ ├── fu_i_window.m │ ├── fu_locmax.m │ ├── fu_optstruct_init.m │ ├── fu_pause_detector.m │ ├── fu_r2c.m │ ├── fu_rmse.m │ ├── fu_smooth.m │ ├── fu_smooth_binvec.m │ ├── fu_smooth_binvec_sub.m │ ├── fu_sylbnd.m │ ├── fu_sylncl.m │ ├── fu_sylncl_sub.m │ ├── fu_transp.m │ ├── fu_trim_vec.m │ ├── fu_typecount.m │ ├── fu_voicing.m │ ├── fu_voicing_sub.m │ ├── fu_window_bnd.m │ ├── fu_window_vec.m │ ├── fu_zero_crossing_rate.m │ └── row2columnvec.m ├── praatScripts ├── get_pitch_and_intensity.praat └── psolaPitch.praat ├── pyacoustics ├── __init__.py ├── aggregate_features.py ├── intensity_and_pitch │ ├── __init__.py │ └── get_f0.py ├── morph │ ├── __init__.py │ └── intensity_morph.py ├── signals │ ├── __init__.py │ ├── audio_scripts.py │ └── data_fitting.py ├── speech_detection │ ├── __init__.py │ ├── common.py │ ├── naive_vad.py │ ├── naive_vad_efficient.py │ ├── segment_stereo_speech.py │ ├── segment_stereo_speech_efficient.py │ ├── split_on_tone.py │ └── textgrids.py ├── speech_filters │ ├── __init__.py │ └── speech_shaped_noise.py ├── speech_rate │ ├── __init__.py │ ├── dictionary_estimate.py │ └── uwe_sr.py ├── text │ ├── __init__.py │ ├── frequency.py │ └── transcript.py ├── textgrids │ ├── __init__.py │ ├── syllabify_textgrids.py │ └── textgrids.py └── utilities │ ├── __init__.py │ ├── error_utils.py │ ├── filters.py │ ├── matlab.py │ ├── my_math.py │ ├── normalize.py │ ├── sequences.py │ ├── statistics.py │ └── utils.py ├── resources ├── buckeye_counts.txt ├── buckeye_frequency_counts.csv ├── fischer_counts.txt ├── spoken_corpora_frequeny_counts.csv └── switchboard_counts.txt ├── setup.py └── tests ├── __init__.py ├── integration ├── __init__.py └── test_integration.py ├── test_sequences.py └── test_statistics.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | eggs/ 15 | lib/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | 24 | # PyInstaller 25 | # Usually these files are written by a python script from a template 26 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 27 | *.manifest 28 | *.spec 29 | 30 | *.DS_Store 31 | 32 | *.project 33 | *.pydevproject 34 | 35 | examples/files/*/ 36 | examples/graveyard/* 37 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | # PyAcoustics Changelog 3 | 4 | *PyAcoustics uses semantic versioning (Major.Minor.Patch)* 5 | 6 | Ver 2.0 (July 15, 2023) 7 | - drop support for Python 2.7 8 | 9 | Ver 1.0 (June 7, 2015) 10 | - first public release. 11 | 12 | 13 | ## Features as they are added 14 | 15 | I was not using semantic versioning at the time I added these features and did not bump the version number. 16 | 17 | Mask speech with speech shaped noise 18 | (March 21, 2016) 19 | 20 | Find syllable nuclei/estimate speech rate using Uwe Reichel's matlab code 21 | (July 29, 2015) 22 | 23 | Find the valley bottom between peaks (July 7th, 2015) 24 | -------------------------------------------------------------------------------- /DEVELOP.md: -------------------------------------------------------------------------------- 1 | 2 | These are development notes for myself 3 | 4 | ## Documentation 5 | 6 | Documentation is generated with the following command: 7 | `pdoc pyacoustics -d google -o docs` 8 | 9 | A live version can be seen with 10 | `pdoc pyacoustics -d google` 11 | 12 | pdoc will read from pyacoustics, as installed on the computer, so you may need to run `pip install .` if you want to generate documentation from a locally edited version of pyacoustics. 13 | 14 | ## Tests 15 | 16 | Tests are run with 17 | 18 | `pytest --cov=pyacoustics tests/` 19 | 20 | ## Release 21 | 22 | Releases are built and deployed with: 23 | 24 | `python setup.py bdist_wheel sdist` 25 | 26 | `twine upload dist/*` 27 | 28 | Don't forget to tag the release. 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This code contains portions with different licenses. Unless otherwise stated 2 | in this document, assume that the code is covered by the MIT license as stated below. 3 | 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2015, 2016, 2017, 2018, 2019, 2020 Tim Mahrt 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE. 25 | 26 | -------------------------- 27 | 28 | The matlab scripts contained in the folder 'nucleus_detection_matlab' were 29 | written by Uwe Reichel. They are released here under the MIT license with 30 | Uwe Reichel's permission. 31 | 32 | -------------------------- 33 | 34 | The file speech_shaped_noise.py contains code redistributed from the project 35 | pambox. 36 | https://github.com/achabotl/pambox 37 | 38 | The modified portion of the pambox code included in speech_shaped_noise.py is 39 | redistributed with the following license: 40 | 41 | Copyright (c) 2014, Alexandre Chabot-Leclerc 42 | All rights reserved. 43 | 44 | Redistribution and use in source and binary forms, with or without 45 | modification, are permitted provided that the following conditions are met: 46 | 47 | * Redistributions of source code must retain the above copyright notice, this 48 | list of conditions and the following disclaimer. 49 | 50 | * Redistributions in binary form must reproduce the above copyright notice, 51 | this list of conditions and the following disclaimer in the documentation 52 | and/or other materials provided with the distribution. 53 | 54 | * Neither the name of the Technical University of Denmark nor the names of its 55 | contributors may be used to endorse or promote products derived from 56 | this software without specific prior written permission. 57 | 58 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 59 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 61 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 62 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 64 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 65 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 66 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 67 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 68 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include praatScripts/*.praat -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # pyAcoustics 3 | 4 | [![](https://img.shields.io/badge/license-MIT-blue.svg?)](http://opensource.org/licenses/MIT) 5 | 6 | A collection of python scripts for extracting and analyzing acoustics from audio files. 7 | 8 | # Table of contents 9 | 1. [Common use cases](#common-use-cases) 10 | 2. [Version history](#version-history) 11 | 3. [Requirements](#requirements) 12 | 4. [Installation](#installation) 13 | 5. [Example usage](#example-usage) 14 | 6. [Citing pyAcoustics](#citing-pyacoustics) 15 | 7. [Acknowledgements](#acknowledgements) 16 | 17 | ## Common Use Cases 18 | 19 | What can you do with this library? 20 | 21 | - Extract pitch and intensity:: 22 | 23 | pyacoustics.intensity_and_pitch.praat_pi.getPraatPitchAndIntensity() 24 | 25 | - Extract segments of a wav file:: 26 | 27 | pyacoustics.signals.audio_scripts.getSubwav() 28 | 29 | - Perform simple manipulations on wav files:: 30 | 31 | pyacoustics.signals.resampleAudio() 32 | 33 | pyacoustics.signals.splitStereoAudio() 34 | 35 | - Split audio files on segments of silence or on pure tones:: 36 | 37 | pyacoustics.speech_detection.split_on_tone.splitFileOnTone() 38 | 39 | - Programmatically manipulate pitch or duration of a file:: 40 | 41 | pyacoustics.morph.morph_utils.praat_pitch() 42 | 43 | - Mask speech with speech shaped noise:: 44 | 45 | pyacoustics.speech_filters.speech_shaped_noise.batchMaskSpeakerData() 46 | 47 | - And more! 48 | 49 | 50 | ## Version history 51 | 52 | *Praatio uses semantic versioning (Major.Minor.Patch)* 53 | 54 | Please view [CHANGELOG.md]() for version history. 55 | 56 | 57 | ## Requirements 58 | 59 | Many of the individual features require different packages. If you aren't using those 60 | packages then you don't need to install the dependencies. 61 | 62 | pyacoustics.intensity_and_pitch.praat_pi requires 63 | [praat]() 64 | 65 | pyacoustics.intensity_and_pitch.get_f0 requires the ESPS getF0 function as implemented 66 | by [Snack]() although I recall having difficulty 67 | installing it. 68 | 69 | pyacoustics.speech_rate/dictionary_estimate.py requires my library 70 | [psyle]() 71 | 72 | pyacoustics.signals.data_fitting.py requires 73 | [SciPy](), 74 | [NumPy](), and 75 | [scikit-learn]() 76 | 77 | My praatIO library is used extensively and can be downloaded 78 | [here]() 79 | 80 | 81 | ## Installation 82 | 83 | PyAcoustics is on pypi and can be installed or upgraded from the command-line shell with pip like so:: 84 | 85 | python -m pip install pyacoustics --upgrade 86 | 87 | Otherwise, to manually install, after downloading the source from github, from a command-line shell, navigate to the directory containing setup.py and type:: 88 | 89 | python setup.py install 90 | 91 | If python is not in your path, you'll need to enter the full path e.g.:: 92 | 93 | C:\Python36\python.exe setup.py install 94 | 95 | 96 | ## Example usage 97 | 98 | See the example folders for a few real-world examples using this library. 99 | 100 | - examples/split_audio_on_silence.py 101 | 102 | Detects the presence of speech in a recording based on acoustic 103 | intensity. Everything louder than some threshold specified by 104 | the user is considered speech. 105 | 106 | - examples/split_audio_on_tone.py 107 | 108 | Detects the presence of pure tones in a recording. One can use 109 | this to automatically segment stimuli. Beeps can be played while 110 | the speech is being recorded and then later this tool can 111 | automatically segment the speech, based on the presence of those 112 | tones. 113 | 114 | Also detects speech using a pitch analysis. Most syllables 115 | contain some voicing, so a stream of modulating pitch values 116 | suggests that someone is speaking. This aspect is not extensively 117 | tested but it works well for the example files. 118 | 119 | - examples/estimate_speech_rate.py 120 | 121 | Calculates the speech rate through a matlab script written by 122 | [Uwe Reichel]() 123 | that estimates the location of syllable boundaries. 124 | 125 | ## Citing PyAcoustics 126 | 127 | PyAcoustics is general purpose coding and doesn't need to be cited 128 | but if you would like to, it can be cited like so: 129 | 130 | Tim Mahrt. PyAcoustics. https://github.com/timmahrt/pyAcoustics, 2016. 131 | 132 | 133 | ## Acknowledgements 134 | 135 | PyAcoustics is an ongoing collection of code with contributions from a 136 | number of projects worked on over several years. Development of various 137 | aspects of PyAcoustics was possible thanks to 138 | NSF grant **IIS 07-03624** 139 | to Jennifer Cole and Mark Hasegawa-Johnson, 140 | NSF grant BCS **12-51343** 141 | to Jennifer Cole, José Hualde, and Caroline Smith, and 142 | NSF grant 143 | **IBSS SMA 14-16791** to Jennifer Cole, Nancy McElwain, and Daniel Berry. 144 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/examples/__init__.py -------------------------------------------------------------------------------- /examples/estimate_speech_rate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jul 27, 2015 3 | 4 | @author: tmahrt 5 | 6 | Two examples of how to use uwe_sr with two different types of data and 7 | two different tasks. 8 | 9 | First, it is possible to run this data on either whole files or on segments 10 | of a file (here the segment times are extracted from a textgrid but you could 11 | use other input sources). 12 | 13 | Second, in one task, the syllable nuclei are seralized in a textgrid. In the 14 | other task, the speech rate is calculated. 15 | """ 16 | 17 | from os.path import join 18 | 19 | from praatio import textgrid 20 | from praatio import praatio_scripts 21 | 22 | from pyacoustics.signals import audio_scripts 23 | from pyacoustics.speech_rate import uwe_sr 24 | from pyacoustics.utilities import utils 25 | from pyacoustics.utilities import my_math 26 | 27 | 28 | def _runSpeechRateEstimate( 29 | wavPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd=True 30 | ): 31 | uwe_sr.findSyllableNuclei( 32 | wavPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd 33 | ) 34 | 35 | 36 | def _runSpeechRateEstimateOnIntervals( 37 | wavPath, 38 | tgPath, 39 | tierName, 40 | wavTmpPath, 41 | syllableNucleiPath, 42 | matlabEXE, 43 | matlabScriptsPath, 44 | printCmd=True, 45 | outputTGFlag=False, 46 | ): 47 | utils.makeDir(wavTmpPath) 48 | # Split audio files into subsections based on textgrid intervals 49 | for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True): 50 | praatio_scripts.splitAudioOnTier( 51 | join(wavPath, name + ".wav"), 52 | join(tgPath, name + ".TextGrid"), 53 | tierName, 54 | wavTmpPath, 55 | outputTGFlag, 56 | ) 57 | 58 | uwe_sr.findSyllableNuclei( 59 | wavTmpPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd 60 | ) 61 | 62 | 63 | def _addSyllableNucleiToTextgrids( 64 | wavPath, tgPath, tierName, syllableNucleiPath, outputPath 65 | ): 66 | # Add syllable nuclei to textgrids 67 | for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True): 68 | tg = textgrid.openTextgrid( 69 | join(tgPath, name + ".TextGrid"), includeEmptyIntervals=False 70 | ) 71 | entryList = tg.tierDict[tierName].entryList 72 | startTimeList = [entry[0] for entry in entryList] 73 | nucleusSyllableList = uwe_sr.toAbsoluteTime( 74 | name, syllableNucleiPath, startTimeList 75 | ) 76 | flattenedSyllableList = [ 77 | nuclei for sublist in nucleusSyllableList for nuclei in sublist 78 | ] 79 | wavFN = join(wavPath, name + ".wav") 80 | duration = audio_scripts.getSoundFileDuration(wavFN) 81 | 82 | oom = my_math.orderOfMagnitude(len(flattenedSyllableList)) 83 | labelTemplate = "%%0%dd" % (oom + 1) 84 | 85 | entryList = [ 86 | (timestamp, labelTemplate % i) 87 | for i, timestamp in enumerate(flattenedSyllableList) 88 | ] 89 | print(flattenedSyllableList) 90 | tier = textgrid.PointTier("Syllable Nuclei", entryList, 0, duration) 91 | 92 | tgFN = join(tgPath, name + ".TextGrid") 93 | tg = textgrid.openTextgrid(tgFN, includeEmptyIntervals=False) 94 | tg.addTier(tier) 95 | tg.save( 96 | join(outputPath, name + ".TextGrid"), 97 | format="short_textgrid", 98 | includeBlankSpaces=True, 99 | ) 100 | 101 | 102 | def _calculateSyllablesPerSecond(wavPath, syllableNucleiPath): 103 | for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True): 104 | nucleusSyllableList = uwe_sr.toAbsoluteTime( 105 | name, 106 | syllableNucleiPath, 107 | [ 108 | 0, 109 | ], 110 | ) 111 | nucleusSyllableList = [ 112 | nucleus for subList in nucleusSyllableList for nucleus in subList 113 | ] 114 | numSyllables = len(nucleusSyllableList) 115 | wavFN = join(wavPath, name + ".wav") 116 | duration = audio_scripts.getSoundFileDuration(wavFN) 117 | 118 | print("%s - %.02f syllables/second" % (name, numSyllables / float(duration))) 119 | 120 | 121 | def _calculateSyllablesPerSecondForIntervals( 122 | wavPath, tgPath, tierName, syllableNucleiPath 123 | ): 124 | # Add syllable nuclei to textgrids 125 | for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True): 126 | tg = textgrid.openTextgrid( 127 | join(tgPath, name + ".TextGrid"), includeEmptyIntervals=False 128 | ) 129 | entryList = tg.tierDict[tierName].entryList 130 | startTimeList = [entry[0] for entry in entryList] 131 | nucleusSyllableList = uwe_sr.toAbsoluteTime( 132 | name, syllableNucleiPath, startTimeList 133 | ) 134 | 135 | durationList = [] 136 | for intervalList, entry in utils.safeZip( 137 | [nucleusSyllableList, entryList], enforceLength=True 138 | ): 139 | start, stop = entry[0], entry[1] 140 | duration = len(intervalList) / (stop - start) 141 | durationList.append(str(duration)) 142 | 143 | print( 144 | "%s - %s (syllables/second for each interval)" 145 | % (name, ",".join(durationList)) 146 | ) 147 | 148 | 149 | def markupTextgridWithSyllableNuclei( 150 | wavPath, 151 | tgPath, 152 | tierName, 153 | wavTmpPath, 154 | syllableNucleiPath, 155 | matlabEXE, 156 | matlabScriptsPath, 157 | outputPath, 158 | printCmd=True, 159 | outputTGFlag=False, 160 | ): 161 | utils.makeDir(outputPath) 162 | 163 | # This can be commented out and instead, you can run the code directly 164 | # from matlab, then you can start directly from the next line 165 | _runSpeechRateEstimateOnIntervals( 166 | wavPath, 167 | tgPath, 168 | tierName, 169 | wavTmpPath, 170 | syllableNucleiPath, 171 | matlabEXE, 172 | matlabScriptsPath, 173 | printCmd, 174 | outputTGFlag, 175 | ) 176 | 177 | _addSyllableNucleiToTextgrids( 178 | wavPath, tgPath, tierName, syllableNucleiPath, outputPath 179 | ) 180 | 181 | _calculateSyllablesPerSecondForIntervals( 182 | wavPath, tgPath, tierName, syllableNucleiPath 183 | ) 184 | 185 | 186 | def getSpeechRateForIntervals( 187 | wavPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd=True 188 | ): 189 | # This can be commented out and instead, you can run the code directly 190 | # from matlab, then you can start directly from the next line 191 | _runSpeechRateEstimate( 192 | wavPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd 193 | ) 194 | 195 | _calculateSyllablesPerSecond(wavPath, syllableNucleiPath) 196 | 197 | 198 | if __name__ == "__main__": 199 | _rootDir = "/Users/tmahrt/Dropbox/workspace/pyAcoustics/examples/files" 200 | _wavPath = _rootDir 201 | _syllableNucleiPath = join(_rootDir, "syllableNuclei_portions") 202 | _matlabEXE = "/Applications/MATLAB_R2014a.app/bin/matlab" 203 | _matlabScriptsPath = "/Users/tmahrt/Dropbox/workspace/pyAcoustics/" "matlabScripts" 204 | 205 | # getSpeechRateForIntervals(_wavPath, _syllableNucleiPath, _matlabEXE, 206 | # _matlabScriptsPath) 207 | 208 | _wavTmpPath = join(_wavPath, "subset_wav_files") 209 | _tgPath = _rootDir 210 | _tierName = "utterances" 211 | _syllableNucleiPath = join(_rootDir, "syllableNuclei_whole") 212 | _outputPath = join(_rootDir, "textgrids_w_syllable_nucleus_markings") 213 | 214 | markupTextgridWithSyllableNuclei( 215 | _wavPath, 216 | _tgPath, 217 | _tierName, 218 | _wavTmpPath, 219 | _syllableNucleiPath, 220 | _matlabEXE, 221 | _matlabScriptsPath, 222 | _outputPath, 223 | ) 224 | -------------------------------------------------------------------------------- /examples/files/introduction.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile short" 2 | "TextGrid" 3 | 4 | 0.0 5 | 17.516375 6 | 7 | 3 8 | "IntervalTier" 9 | "utterances" 10 | 0.0 11 | 17.516375 12 | 17 13 | 0.0 14 | 0.181470716579 15 | "" 16 | 0.181470716579 17 | 0.687915993749 18 | "こんにちは" 19 | 0.687915993749 20 | 1.30596645968 21 | "" 22 | 1.30596645968 23 | 3.32877817967 24 | "私は、ティム·マートです" 25 | 3.32877817967 26 | 3.93472274005 27 | "" 28 | 3.93472274005 29 | 5.33896478585 30 | "私は学生です" 31 | 5.33896478585 32 | 6.04797904058 33 | "" 34 | 6.04797904058 35 | 7.67648659987 36 | "日本語を勉強します。" 37 | 7.67648659987 38 | 8.03355270487 39 | "" 40 | 8.03355270487 41 | 10.2762026369 42 | "この夏は日本にいきました" 43 | 10.2762026369 44 | 10.8441923856 45 | "" 46 | 10.8441923856 47 | 13.7353684085 48 | "東京行きましたも四国行きました" 49 | 13.7353684085 50 | 14.3480198798 51 | "" 52 | 14.3480198798 53 | 15.186091973 54 | "楽しかった" 55 | 15.186091973 56 | 15.9461478394 57 | "" 58 | 15.9461478394 59 | 17.4539965371 60 | "ラーメン大好きです" 61 | 17.4539965371 62 | 17.516375 63 | "" 64 | "IntervalTier" 65 | "words" 66 | 0.0 67 | 17.516375 68 | 21 69 | 0.0 70 | 0.181470716579 71 | "" 72 | 0.181470716579 73 | 0.687915993749 74 | "こんにちは" 75 | 0.687915993749 76 | 1.30596645968 77 | "" 78 | 1.30596645968 79 | 2.02596645968 80 | "私は" 81 | 2.02596645968 82 | 3.32877817967 83 | "ティムマートです" 84 | 3.32877817967 85 | 3.93472274005 86 | "" 87 | 3.93472274005 88 | 5.33896478585 89 | "私は学生です" 90 | 5.33896478585 91 | 6.04797904058 92 | "" 93 | 6.04797904058 94 | 7.67648659987 95 | "日本語を勉強します" 96 | 7.67648659987 97 | 8.03355270487 98 | "" 99 | 8.03355270487 100 | 8.22355270487 101 | "この" 102 | 8.22355270487 103 | 8.90355270487 104 | "夏は" 105 | 8.90355270487 106 | 10.2762026369 107 | "日本にいきました" 108 | 10.2762026369 109 | 10.8441923856 110 | "" 111 | 10.8441923856 112 | 12.5541923856 113 | "東京行きましたも" 114 | 12.5541923856 115 | 13.7353684085 116 | "四国行きました" 117 | 13.7353684085 118 | 14.3480198798 119 | "" 120 | 14.3480198798 121 | 15.186091973 122 | "楽しかった" 123 | 15.186091973 124 | 15.9461478394 125 | "" 126 | 15.9461478394 127 | 17.4539965371 128 | "ラーメン大好きです" 129 | 17.4539965371 130 | 17.516375 131 | "" 132 | "IntervalTier" 133 | "phones" 134 | 0.0 135 | 17.516375 136 | 159 137 | 0.0 138 | 0.181470716579 139 | "" 140 | 0.181470716579 141 | 0.201470716579 142 | "k" 143 | 0.201470716579 144 | 0.231470716579 145 | "o" 146 | 0.231470716579 147 | 0.261470716579 148 | "N" 149 | 0.261470716579 150 | 0.291470716579 151 | "n" 152 | 0.291470716579 153 | 0.361470716579 154 | "i" 155 | 0.361470716579 156 | 0.421470716579 157 | "ch" 158 | 0.421470716579 159 | 0.451470716579 160 | "i" 161 | 0.451470716579 162 | 0.541470716579 163 | "w" 164 | 0.541470716579 165 | 0.687915993749 166 | "a" 167 | 0.687915993749 168 | 1.30596645968 169 | "" 170 | 1.30596645968 171 | 1.32596645968 172 | "w" 173 | 1.32596645968 174 | 1.35596645968 175 | "a" 176 | 1.35596645968 177 | 1.40596645968 178 | "t" 179 | 1.40596645968 180 | 1.45596645968 181 | "a" 182 | 1.45596645968 183 | 1.57596645968 184 | "sh" 185 | 1.57596645968 186 | 1.66596645968 187 | "i" 188 | 1.66596645968 189 | 1.88596645968 190 | "w" 191 | 1.88596645968 192 | 2.02596645968 193 | "a" 194 | 2.02596645968 195 | 2.15596645968 196 | "t" 197 | 2.15596645968 198 | 2.28596645968 199 | "i" 200 | 2.28596645968 201 | 2.32596645968 202 | "m" 203 | 2.32596645968 204 | 2.51596645968 205 | "u" 206 | 2.51596645968 207 | 2.69596645968 208 | "m" 209 | 2.69596645968 210 | 2.82596645968 211 | "a:" 212 | 2.82596645968 213 | 2.88596645968 214 | "t" 215 | 2.88596645968 216 | 2.92596645968 217 | "o" 218 | 2.92596645968 219 | 2.98596645968 220 | "d" 221 | 2.98596645968 222 | 3.05596645968 223 | "e" 224 | 3.05596645968 225 | 3.26596645968 226 | "s" 227 | 3.26596645968 228 | 3.32877817967 229 | "u" 230 | 3.32877817967 231 | 3.93472274005 232 | "" 233 | 3.93472274005 234 | 3.95472274005 235 | "w" 236 | 3.95472274005 237 | 3.98472274005 238 | "a" 239 | 3.98472274005 240 | 4.02472274005 241 | "t" 242 | 4.02472274005 243 | 4.06472274005 244 | "a" 245 | 4.06472274005 246 | 4.14472274005 247 | "sh" 248 | 4.14472274005 249 | 4.18472274005 250 | "i" 251 | 4.18472274005 252 | 4.36472274005 253 | "w" 254 | 4.36472274005 255 | 4.49472274005 256 | "a" 257 | 4.49472274005 258 | 4.64472274005 259 | "g" 260 | 4.64472274005 261 | 4.70472274005 262 | "a" 263 | 4.70472274005 264 | 4.73472274005 265 | "k" 266 | 4.73472274005 267 | 4.76472274005 268 | "u" 269 | 4.76472274005 270 | 4.89472274005 271 | "s" 272 | 4.89472274005 273 | 4.93472274005 274 | "e" 275 | 4.93472274005 276 | 4.96472274005 277 | "i" 278 | 4.96472274005 279 | 5.02472274005 280 | "d" 281 | 5.02472274005 282 | 5.11472274005 283 | "e" 284 | 5.11472274005 285 | 5.27472274005 286 | "s" 287 | 5.27472274005 288 | 5.33896478585 289 | "u" 290 | 5.33896478585 291 | 6.04797904058 292 | "" 293 | 6.04797904058 294 | 6.06797904058 295 | "n" 296 | 6.06797904058 297 | 6.10797904058 298 | "i" 299 | 6.10797904058 300 | 6.16797904058 301 | "h" 302 | 6.16797904058 303 | 6.22797904058 304 | "o" 305 | 6.22797904058 306 | 6.28797904058 307 | "N" 308 | 6.28797904058 309 | 6.34797904058 310 | "g" 311 | 6.34797904058 312 | 6.37797904058 313 | "o" 314 | 6.37797904058 315 | 6.40797904058 316 | "w" 317 | 6.40797904058 318 | 6.72797904058 319 | "o" 320 | 6.72797904058 321 | 6.84797904058 322 | "b" 323 | 6.84797904058 324 | 6.92797904058 325 | "e" 326 | 6.92797904058 327 | 6.95797904058 328 | "N" 329 | 6.95797904058 330 | 7.06797904058 331 | "ky" 332 | 7.06797904058 333 | 7.10797904058 334 | "o:" 335 | 7.10797904058 336 | 7.22797904058 337 | "sh" 338 | 7.22797904058 339 | 7.25797904058 340 | "i" 341 | 7.25797904058 342 | 7.30797904058 343 | "m" 344 | 7.30797904058 345 | 7.41797904058 346 | "a" 347 | 7.41797904058 348 | 7.61797904058 349 | "s" 350 | 7.61797904058 351 | 7.67648659987 352 | "u" 353 | 7.67648659987 354 | 8.03355270487 355 | "" 356 | 8.03355270487 357 | 8.07355270487 358 | "k" 359 | 8.07355270487 360 | 8.10355270487 361 | "o" 362 | 8.10355270487 363 | 8.14355270487 364 | "n" 365 | 8.14355270487 366 | 8.22355270487 367 | "o" 368 | 8.22355270487 369 | 8.25355270487 370 | "n" 371 | 8.25355270487 372 | 8.34355270487 373 | "a" 374 | 8.34355270487 375 | 8.44355270487 376 | "ts" 377 | 8.44355270487 378 | 8.47355270487 379 | "u" 380 | 8.47355270487 381 | 8.63355270487 382 | "w" 383 | 8.63355270487 384 | 8.90355270487 385 | "a" 386 | 8.90355270487 387 | 8.99355270487 388 | "n" 389 | 8.99355270487 390 | 9.08355270487 391 | "i" 392 | 9.08355270487 393 | 9.11355270487 394 | "p" 395 | 9.11355270487 396 | 9.18355270487 397 | "o" 398 | 9.18355270487 399 | 9.30355270487 400 | "N" 401 | 9.30355270487 402 | 9.35355270487 403 | "n" 404 | 9.35355270487 405 | 9.66355270487 406 | "i" 407 | 9.66355270487 408 | 9.74355270487 409 | "i" 410 | 9.74355270487 411 | 9.81355270487 412 | "k" 413 | 9.81355270487 414 | 9.85355270487 415 | "i" 416 | 9.85355270487 417 | 9.91355270487 418 | "m" 419 | 9.91355270487 420 | 9.96355270487 421 | "a" 422 | 9.96355270487 423 | 10.0435527049 424 | "sh" 425 | 10.0435527049 426 | 10.0735527049 427 | "i" 428 | 10.0735527049 429 | 10.1035527049 430 | "t" 431 | 10.1035527049 432 | 10.2762026369 433 | "a" 434 | 10.2762026369 435 | 10.8441923856 436 | "" 437 | 10.8441923856 438 | 10.8741923856 439 | "t" 440 | 10.8741923856 441 | 10.9741923856 442 | "o:" 443 | 10.9741923856 444 | 11.0741923856 445 | "ky" 446 | 11.0741923856 447 | 11.2241923856 448 | "o:" 449 | 11.2241923856 450 | 11.2541923856 451 | "i" 452 | 11.2541923856 453 | 11.3641923856 454 | "k" 455 | 11.3641923856 456 | 11.5241923856 457 | "i" 458 | 11.5241923856 459 | 11.5941923856 460 | "m" 461 | 11.5941923856 462 | 11.6541923856 463 | "a" 464 | 11.6541923856 465 | 11.7341923856 466 | "sh" 467 | 11.7341923856 468 | 11.7641923856 469 | "i" 470 | 11.7641923856 471 | 11.8041923856 472 | "t" 473 | 11.8041923856 474 | 12.0441923856 475 | "a" 476 | 12.0441923856 477 | 12.1441923856 478 | "m" 479 | 12.1441923856 480 | 12.5541923856 481 | "o" 482 | 12.5541923856 483 | 12.6741923856 484 | "sh" 485 | 12.6741923856 486 | 12.7041923856 487 | "i" 488 | 12.7041923856 489 | 12.8141923856 490 | "k" 491 | 12.8141923856 492 | 12.9141923856 493 | "o" 494 | 12.9141923856 495 | 12.9741923856 496 | "k" 497 | 12.9741923856 498 | 13.1041923856 499 | "u" 500 | 13.1041923856 501 | 13.1941923856 502 | "i" 503 | 13.1941923856 504 | 13.2241923856 505 | "k" 506 | 13.2241923856 507 | 13.2741923856 508 | "i" 509 | 13.2741923856 510 | 13.3141923856 511 | "m" 512 | 13.3141923856 513 | 13.3841923856 514 | "a" 515 | 13.3841923856 516 | 13.4741923856 517 | "sh" 518 | 13.4741923856 519 | 13.5041923856 520 | "i" 521 | 13.5041923856 522 | 13.5441923856 523 | "t" 524 | 13.5441923856 525 | 13.7353684085 526 | "a" 527 | 13.7353684085 528 | 14.3480198798 529 | "" 530 | 14.3480198798 531 | 14.3680198798 532 | "t" 533 | 14.3680198798 534 | 14.4280198798 535 | "a" 536 | 14.4280198798 537 | 14.4880198798 538 | "n" 539 | 14.4880198798 540 | 14.5480198798 541 | "o" 542 | 14.5480198798 543 | 14.6880198798 544 | "sh" 545 | 14.6880198798 546 | 14.8180198798 547 | "i" 548 | 14.8180198798 549 | 14.8780198798 550 | "k" 551 | 14.8780198798 552 | 14.9880198798 553 | "a" 554 | 14.9880198798 555 | 15.0180198798 556 | "t" 557 | 15.0180198798 558 | 15.186091973 559 | "a" 560 | 15.186091973 561 | 15.9461478394 562 | "" 563 | 15.9461478394 564 | 16.0461478394 565 | "r" 566 | 16.0461478394 567 | 16.1561478394 568 | "a:" 569 | 16.1561478394 570 | 16.2861478394 571 | "m" 572 | 16.2861478394 573 | 16.3261478394 574 | "e" 575 | 16.3261478394 576 | 16.6161478394 577 | "N" 578 | 16.6161478394 579 | 16.7661478394 580 | "d" 581 | 16.7661478394 582 | 16.8461478394 583 | "a" 584 | 16.8461478394 585 | 16.8961478394 586 | "i" 587 | 16.8961478394 588 | 16.9661478394 589 | "s" 590 | 16.9661478394 591 | 16.9961478394 592 | "u" 593 | 16.9961478394 594 | 17.0261478394 595 | "k" 596 | 17.0261478394 597 | 17.1161478394 598 | "i" 599 | 17.1161478394 600 | 17.1561478394 601 | "d" 602 | 17.1561478394 603 | 17.2361478394 604 | "e" 605 | 17.2361478394 606 | 17.3961478394 607 | "s" 608 | 17.3961478394 609 | 17.4539965371 610 | "u" 611 | 17.4539965371 612 | 17.516375 613 | "" 614 | -------------------------------------------------------------------------------- /examples/files/introduction.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/examples/files/introduction.wav -------------------------------------------------------------------------------- /examples/files/tone_split_data.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 16.8040625 6 | 7 | 1 8 | "IntervalTier" 9 | "utterances" 10 | 0 11 | 16.8040625 12 | 9 13 | 0 14 | 2.144973011908906 15 | "" 16 | 2.144973011908906 17 | 3.7520176961273086 18 | "01" 19 | 3.7520176961273086 20 | 6.400665416413194 21 | "" 22 | 6.400665416413194 23 | 7.724989276556136 24 | "02" 25 | 7.724989276556136 26 | 10.090916172766562 27 | "" 28 | 10.090916172766562 29 | 11.802121160591712 30 | "03" 31 | 11.802121160591712 32 | 14.361488620643241 33 | "" 34 | 14.361488620643241 35 | 15.98341334823404 36 | "04" 37 | 15.98341334823404 38 | 16.8040625 39 | "" 40 | -------------------------------------------------------------------------------- /examples/files/tone_split_data.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/examples/files/tone_split_data.wav -------------------------------------------------------------------------------- /examples/frequency.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Apr 16, 2018 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import io 8 | from os.path import join 9 | from pyacoustics.text import frequency 10 | 11 | rootPath = r"/Users/tmahrt/Dropbox/workspace/pyAcoustics/resources" 12 | outputFN = r"/Users/tmahrt/Desktop/buckeye_frequency_counts.csv" 13 | 14 | buckeye = frequency.Buckeye(join(rootPath, "buckeye_counts.txt")) 15 | fischer = frequency.Fischer(join(rootPath, "fischer_counts.txt")) 16 | # google = frequency.GoogleUnigram(join(rootPath, "google.letter.unigram")) # Too large to include? License issue? 17 | switchboard = frequency.SwitchboardTim(join(rootPath, "switchboard_counts.txt")) 18 | 19 | outputList = [] 20 | wordList = list(buckeye.frequencyDict.keys()) 21 | wordList.sort() 22 | sumV = 0 23 | for word in wordList: 24 | 25 | # Not including words that were tagged for any reason 26 | if word[0] == "[": 27 | continue 28 | 29 | sumV += buckeye.getFrequency(word, outOfDictionaryValue=0)[0] 30 | 31 | row = [ 32 | word, 33 | ] 34 | for corpus in [ 35 | buckeye, 36 | fischer, 37 | # google, 38 | switchboard, 39 | ]: 40 | row.extend(corpus.getFrequency(word, outOfDictionaryValue="")) 41 | 42 | rowTxt = ",".join([str(val) for val in row]) 43 | outputList.append(rowTxt) 44 | 45 | outputTxt = u"\n".join(outputList) 46 | with io.open(outputFN, "w") as fd: 47 | fd.write(outputTxt) 48 | 49 | print(sumV) 50 | -------------------------------------------------------------------------------- /examples/split_audio_on_silence.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join 3 | 4 | import datetime 5 | 6 | now = datetime.datetime.now 7 | 8 | from praatio import pitch_and_intensity 9 | 10 | from pyacoustics.speech_detection import naive_vad 11 | from pyacoustics.signals import audio_scripts 12 | from pyacoustics.signals import data_fitting 13 | from pyacoustics.utilities import utils 14 | from pyacoustics.utilities import my_math 15 | 16 | from praatio import textgrid 17 | 18 | 19 | def audiosplitSilence( 20 | inputPath, 21 | fn, 22 | tgPath, 23 | pitchPath, 24 | subwavPath, 25 | minPitch, 26 | maxPitch, 27 | stepSize, 28 | numSteps, 29 | praatEXE, 30 | praatScriptPath, 31 | generateWavs=False, 32 | numSegmentsToExtract=None, 33 | ): 34 | """ 35 | Extract the non-silence portions of a file 36 | 37 | minPitch - the speaker's minimum pitch 38 | maxPitch - the speaker's maximum pitch 39 | intensityPercentile - Given the distribution of intensity values in a file, 40 | the intensity threshold to use is the one that 41 | falls at /intensityPercentile/ 42 | Any intensity values less than the intensity 43 | threshold will be considered silence. 44 | I typically use a value between 0.2 or 0.3. 45 | stepSize - non-overlapping step size (in seconds) 46 | numSteps - number of consecutive blocks needed for a segment to be 47 | considered silence 48 | stepSize * numSteps is the smallest possible interval that 49 | can be considered silence/not-silence. 50 | praatEXE - fullpath to a praat executable. On Windows use praatcon.exe. 51 | Other systems use praat 52 | praatScriptPath - location of the folder containing praat scripts that 53 | is distributed with pyAcoustics 54 | numSegmentsToExtract - if not None remove all but the X loudest segments as 55 | specified by /numSegmentsToExtract/. Otherwise, 56 | all non-silent segments are kept. 57 | generateWavs - if False, no wavefiles are extracted, but you can look at 58 | the generated textgrids to see which wavefiles would have 59 | been extracted 60 | """ 61 | utils.makeDir(tgPath) 62 | utils.makeDir(pitchPath) 63 | utils.makeDir(subwavPath) 64 | 65 | name = os.path.splitext(fn)[0] 66 | 67 | piSamplingRate = 100 # Samples per second 68 | sampleStep = 1 / float(piSamplingRate) 69 | outputFN = os.path.splitext(fn)[0] + ".txt" 70 | motherPIList = pitch_and_intensity.extractPI( 71 | join(inputPath, fn), 72 | join(pitchPath, outputFN), 73 | praatEXE, 74 | minPitch, 75 | maxPitch, 76 | sampleStep=sampleStep, 77 | forceRegenerate=False, 78 | ) 79 | 80 | # entry = (time, pitchVal, intVal) 81 | motherPIList = [float(entry[2]) for entry in motherPIList] 82 | 83 | # We need the intensity threshold to distinguish silence from speech/noise 84 | # Naively, we can extract this by getting the nth percent most intense 85 | # sound in the file naive_vad.getIntensityPercentile() 86 | # (but then, how do we determine the percent?) 87 | # Alternatively, we could consider the set of intensity values to be 88 | # bimodal -- silent values vs non-silent. The best threshold is the one 89 | # that minimizes the overlap between the two distributions, obtained via 90 | # data_fitting.getBimodalValley() 91 | # silenceThreshold = naive_vad.getIntensityPercentile(motherPIList, 92 | # intensityPercentile) 93 | silenceThreshold = data_fitting.getBimodalValley(motherPIList, doplot=True) 94 | print(silenceThreshold) 95 | entryList = naive_vad.naiveVAD( 96 | motherPIList, silenceThreshold, piSamplingRate, stepSize, numSteps 97 | ) 98 | entryList = [(time[0], time[1], str(i)) for i, time in enumerate(entryList)] 99 | 100 | # Filter out quieter sounds if necessary 101 | if numSegmentsToExtract is not None: 102 | # Get the rms energy of each non-silent region 103 | rmsEntryList = [] 104 | for i, entry in enumerate(entryList): 105 | intList = motherPIList[ 106 | int(entry[0] * piSamplingRate) : int(entry[1] * piSamplingRate) 107 | ] 108 | 109 | rmsVal = my_math.rms(intList) 110 | rmsEntryList.append((rmsVal, entry)) 111 | 112 | rmsEntryList.sort() # Sort by energy 113 | entryList = [rmsTuple[1] for rmsTuple in rmsEntryList[:numSegmentsToExtract]] 114 | entryList.sort() # Sort by time 115 | 116 | # Create the textgrid 117 | tg = textgrid.Textgrid() 118 | duration = audio_scripts.getSoundFileDuration(join(inputPath, fn)) 119 | tier = textgrid.IntervalTier("speech_tier", entryList, 0, duration) 120 | tg.addTier(tier) 121 | tg.save( 122 | join(tgPath, name + ".TextGrid"), 123 | format="short_textgrid", 124 | includeBlankSpaces=True, 125 | ) 126 | 127 | if generateWavs is True: 128 | for i, entry in enumerate(entryList): 129 | subwavOutputFN = join(subwavPath, name + "_" + str(i) + ".wav") 130 | audio_scripts.extractSubwav( 131 | join(inputPath, fn), 132 | subwavOutputFN, 133 | entry[0], 134 | entry[1], 135 | singleChannelFlag=True, 136 | ) 137 | 138 | 139 | if __name__ == "__main__": 140 | _minPitch = 50 141 | _maxPitch = 450 142 | _intensityPercentile = 0.3 143 | _stepSize = 0.1 144 | _numSteps = 5 145 | 146 | _fn = "introduction.wav" 147 | _dataPath = join("/Users/tmahrt/Dropbox/workspace/pyAcoustics/examples/files") 148 | _outputPath = join(_dataPath, "output_stepSize_0.1") 149 | _tgPath = join(_dataPath, "splitAudio_silence_stepSize_0.1") 150 | _pitchPath = join(_dataPath, "pitch") 151 | _wavOutputPath = join(_dataPath, "output_wavs") 152 | _praatEXE = "/Applications/praat.App/Contents/MacOS/Praat" 153 | _praatScriptPath = "/Users/tmahrt/Dropbox/workspace/pyAcoustics/" "praatScripts" 154 | utils.makeDir(_wavOutputPath) 155 | _rootFolderName = os.path.splitext(os.path.split(_fn)[1])[0] 156 | _subwavOutputPath = join(_wavOutputPath, _rootFolderName) 157 | audiosplitSilence( 158 | _dataPath, 159 | _fn, 160 | _tgPath, 161 | _pitchPath, 162 | _subwavOutputPath, 163 | _minPitch, 164 | _maxPitch, 165 | _stepSize, 166 | _numSteps, 167 | _praatEXE, 168 | _praatScriptPath, 169 | ) 170 | 171 | # Changing the parameters used in silence detection can lead to 172 | # very different results 173 | _stepSize = 0.025 174 | _numSteps = 10 175 | _tgPath = join(_dataPath, "splitAudio_silence_stepSize_0.025") 176 | audiosplitSilence( 177 | _dataPath, 178 | _fn, 179 | _tgPath, 180 | _pitchPath, 181 | _subwavOutputPath, 182 | _minPitch, 183 | _maxPitch, 184 | _stepSize, 185 | _numSteps, 186 | _praatEXE, 187 | _praatScriptPath, 188 | ) 189 | -------------------------------------------------------------------------------- /examples/split_audio_on_tone.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join 3 | 4 | from praatio import textgrid 5 | from praatio import pitch_and_intensity 6 | 7 | from pyacoustics.speech_detection import split_on_tone 8 | from pyacoustics.utilities import utils 9 | from pyacoustics.signals import audio_scripts 10 | 11 | 12 | def audiosplitOnTone( 13 | inputPath, 14 | fn, 15 | pitchPath, 16 | tgPath, 17 | subwavPath, 18 | minPitch, 19 | maxPitch, 20 | toneFrequency, 21 | minEventDuration, 22 | praatEXE, 23 | praatScriptPath, 24 | forceRegen, 25 | generateWavs=False, 26 | ): 27 | utils.makeDir(pitchPath) 28 | utils.makeDir(tgPath) 29 | utils.makeDir(subwavPath) 30 | 31 | name = os.path.splitext(fn)[0] 32 | piSamplingRate = 100 # Samples per second 33 | 34 | # Extract pitch and find patterns in the file 35 | outputFN = os.path.splitext(fn)[0] + ".txt" 36 | sampleStep = 1 / float(piSamplingRate) 37 | motherPIList = pitch_and_intensity.extractPI( 38 | join(inputPath, fn), 39 | join(pitchPath, outputFN), 40 | praatEXE, 41 | minPitch, 42 | maxPitch, 43 | sampleStep=sampleStep, 44 | forceRegenerate=forceRegen, 45 | undefinedValue=0.0, 46 | ) 47 | # entry = (time, pitchVal, intVal) 48 | pitchList = [float(entry[1]) for entry in motherPIList] 49 | timeDict = split_on_tone.splitFileOnTone( 50 | pitchList, piSamplingRate, toneFrequency, minEventDuration 51 | ) 52 | 53 | # Output result as textgrid 54 | duration = audio_scripts.getSoundFileDuration(join(inputPath, fn)) 55 | tg = textgrid.Textgrid() 56 | for key in ["beep", "speech", "silence"]: 57 | entryList = timeDict[key] 58 | tier = textgrid.IntervalTier(key, entryList, 0, duration) 59 | tg.addTier(tier) 60 | tg.save( 61 | join(tgPath, name + ".TextGrid"), 62 | format="short_textgrid", 63 | includeBlankSpaces=True, 64 | ) 65 | 66 | # Output audio portions between tones 67 | if generateWavs: 68 | split_on_tone.extractSubwavs(timeDict, inputPath, fn, subwavPath) 69 | 70 | 71 | if __name__ == "__main__": 72 | _dataPath = "/Users/tmahrt/Dropbox/workspace/pyAcoustics/examples/files" 73 | _pitchPath = join(_dataPath, "split_on_tone_pitch") 74 | _tgPath = join(_dataPath, "split_on_tone_textgrids") 75 | _wavOutputPath = join(_dataPath, "split_on_tone_subwavs") 76 | _fn = "tone_split_data.wav" 77 | _minPitch = 50 78 | _maxPitch = 450 79 | _toneFrequency = 330 # Actual frequency is 333 80 | _minEventDuration = 0.2 81 | _forceRegeneratePitch = False 82 | _generateWavs = True 83 | 84 | _praatEXE = "/Applications/praat.App/Contents/MacOS/Praat" 85 | _praatScriptPath = "/Users/tmahrt/Dropbox/workspace/pyAcoustics/" "praatScripts" 86 | 87 | audiosplitOnTone( 88 | _dataPath, 89 | _fn, 90 | _pitchPath, 91 | _tgPath, 92 | _wavOutputPath, 93 | _minPitch, 94 | _maxPitch, 95 | _toneFrequency, 96 | _minEventDuration, 97 | _praatEXE, 98 | _praatScriptPath, 99 | _forceRegeneratePitch, 100 | _generateWavs, 101 | ) 102 | 103 | # Let's try the same code with an incorrect tone frequency 104 | _toneFrequency = 500 105 | _tgPath = join(_dataPath, "split_on_tone_textgrids_500hz_tone") 106 | _generateWavs = False 107 | 108 | audiosplitOnTone( 109 | _dataPath, 110 | _fn, 111 | _pitchPath, 112 | _tgPath, 113 | _wavOutputPath, 114 | _minPitch, 115 | _maxPitch, 116 | _toneFrequency, 117 | _minEventDuration, 118 | _praatEXE, 119 | _praatScriptPath, 120 | _forceRegeneratePitch, 121 | _generateWavs, 122 | ) 123 | -------------------------------------------------------------------------------- /matlabScripts/detect_syllable_nuclei.m: -------------------------------------------------------------------------------- 1 | % Bootstrap script for Uwe Reichels nucleus detection. Written by Tim Mahrt 2 | function[] = detect_syllable_nuclei(path_to_files, output_path) 3 | 4 | files = dir(fullfile(path_to_files,'*.wav')); 5 | for file = files' 6 | [tossPath, name, tossExt] = fileparts(file.name); 7 | 8 | [y fs] = audioread(fullfile(path_to_files, file.name)); 9 | opt.fs = fs; 10 | opt.verbose = 0; 11 | sn = fu_sylncl(y,opt); 12 | 13 | sn = sn ./ fs; % Get the timestamps in seconds 14 | 15 | output_fn = fullfile(output_path,strcat(name,'.txt')); 16 | fd = fopen(output_fn,'w'); 17 | fprintf(fd,'%f\n',sn); 18 | fclose(fd); 19 | end 20 | 21 | end 22 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/column2rowvec.m: -------------------------------------------------------------------------------- 1 | function v=column2rowvec(v) 2 | 3 | % transposition in case input is column vector 4 | 5 | if length(v(1,:))==1 6 | v=v'; 7 | end 8 | 9 | return 10 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_filter.m: -------------------------------------------------------------------------------- 1 | function sflt=fu_filter(s,t,gf,fs,o); 2 | 3 | %sflt=fu_filter(s,t,gf,fs); 4 | %s: signal vector 5 | %t: type 'high'|'low'|'stop'|'band' 6 | %gf: grenzfrequenzen (1 Wert --> Hoch-, Tiefpass, 2 Werte --> Bandpass) 7 | %fs: sample frequency 8 | %o: order, default 10 9 | %applies butter filter 10 | %operates only if gf < fs/2 11 | 12 | fn=gf/(fs/2); 13 | 14 | if fn>=1 15 | sflt=s; 16 | return 17 | end 18 | 19 | if nargin < 5; o=5; end 20 | 21 | if strcmp(t,'band') 22 | [b a]=butter(o,fn); 23 | else 24 | [b a]=butter(o, fn, t); 25 | end 26 | 27 | sflt=filtfilt(b,a,s); 28 | 29 | if length(find(isnan(sflt)))>0 30 | disp('filtering not possible, returning original signal'); 31 | sflt=s; 32 | end 33 | 34 | %freqz(b,a,128,fs); 35 | %subplot(2,1,1) 36 | %x=32000:32000+fs; 37 | %plot(x,s(x),'-b') 38 | %subplot(2,1,2) 39 | %plot(x,sflt(x),'-b') 40 | %a=300000; 41 | %fhpt_play(sflt*a); 42 | 43 | return 44 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_i_window.m: -------------------------------------------------------------------------------- 1 | function wi = fu_i_window(i,wl,l) 2 | 3 | % wi = fu_i_window(i,wl,l) 4 | % i: index in vector 5 | % wl: window length 6 | % l: vector length 7 | % wi: indices in window around i 8 | % - returns indices of window around index i in vector of length l 9 | % - if distance from i to end or beginning of vector is less than wl/2, 10 | % the window is shifted accordingly 11 | 12 | hwl=floor(wl/2); 13 | wi=max(i-hwl,1):min(i+hwl,l); 14 | 15 | % if window too short: trying to lengthen window to wanted size 16 | d=wl-length(wi); 17 | if d>0 18 | if wi(1)>1 19 | o=max(wi(1)-d,1); 20 | wi=o:wi(end); 21 | d=wl-length(wi); 22 | end 23 | if d>0 24 | if wi(end) smoothing options, see fu_smooth 8 | % .mtd <'none'> 9 | % .order <1> 10 | % .peak.mph: <-Inf> min peak height 11 | % .th: <0> threshold; min difference of local peak to neighbors 12 | % .mpd: <1> min peak distance 13 | % .verbose.plot: <0>|1 14 | % .bw: <0>|1 15 | %pks: peak values 16 | %idx: their positions [sample] 17 | 18 | %% init 19 | if nargin<2; opt=struct; end 20 | 21 | opt=fu_optstruct_init(opt,{'smooth' 'peak'},{struct struct}); 22 | opt.smooth=fu_optstruct_init(opt.smooth,{'win' 'mtd' 'order'},{1 'none' 1}); 23 | opt.peak=fu_optstruct_init(opt.peak,{'mph' 'th' 'mpd'},{-Inf 0 1}); 24 | 25 | %% locmax 26 | opt.peak.mpd=min(opt.peak.mpd,length(y)-1); 27 | 28 | [pks idx] = findpeaks(fu_smooth(y,opt.smooth),... 29 | 'MINPEAKDISTANCE',opt.peak.mpd,'MINPEAKHEIGHT',opt.peak.mph,... 30 | 'THRESHOLD',opt.peak.th); 31 | 32 | % fallback 33 | if length(pks)==0 34 | [pks idx] = findpeaks(y); 35 | end 36 | 37 | % transpose to column vector since in 7.10.0 findpeaks() always returns 38 | % row vector! 39 | if size(y,2)==1 40 | pks=fu_r2c(pks); 41 | idx=fu_r2c(idx); 42 | end 43 | 44 | return 45 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_optstruct_init.m: -------------------------------------------------------------------------------- 1 | function opt = fu_optstruct_init(opt,optfields,optdefaults) 2 | 3 | %opt = fu_optstruct_init(opt,optfields,optdefaults) 4 | %initialisation of option structure OPT 5 | %assigns each field given in cell array OPTFIELDS with corresponding 6 | %default value given in cell array OPTDEFAULTS, whenever field is not 7 | %yet specified 8 | %if OPTDEFAULTS{i} is 'oblig' then optfields{i} had already to be set 9 | %by the user. If not, an error is given. 10 | 11 | for n=1:length(optfields) 12 | if ~isfield(opt,optfields{n}) 13 | if (~isnumeric(optdefaults{n}) & strcmp(optdefaults{n},'oblig')) 14 | error(sprintf('opt field "%s" has to be defined by the user!',optfields{n})); 15 | end 16 | opt=setfield(opt,optfields{n},optdefaults{n}); 17 | end 18 | end 19 | 20 | return 21 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_pause_detector.m: -------------------------------------------------------------------------------- 1 | function t = fu_pause_detector(s,opt); 2 | 3 | % t = fu_pause_detector(s,opt); 4 | % looks for pauses in signal according to criterias 5 | % specified in opt 6 | % input: s - signal vector 7 | % opt - structure with fields 8 | % .length: minimum length of pause in s 9 | % .rlength: length of reference window in s 10 | % .f_thresh: threshold factor (*rmse(reference_window)) 11 | % .fs: sample rate 12 | % .ret: <'s'>|'smpl' return values in seconds or samples 13 | % default (optimised on IMS radio news corpus, read speech, 14 | % by fminunc()): 15 | % opt.length = 0.1524; 16 | % opt.f_thresh = 0.0767; 17 | % opt.rlength = 5; 18 | % opt.fs = 16000; 19 | % output: t - matrix of pause time on- and offsets (in s) 20 | % algorithm: 21 | % - preprocessing: removing DC, low pass filtering (10kHz) 22 | % - window y with opt.length sec is moved over signal with stepsize 23 | % 0.05 s 24 | % - reference window rw with opt.rlength sec centered on y midpoint 25 | % is moved in parallel 26 | % - if rmse(rw) < rmse(global_signal)*opt.f_thresh 27 | % rw is set to global_signal (long pause assumed) 28 | % - if rmse(y) < rmse(rw)*opt.f_thresh 29 | % y is considered as a pause 30 | % Uwe Reichel, IPS (2009) 31 | 32 | 33 | %%%% defaults %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 34 | if nargin==1; opt=struct; end 35 | ofld={'f_thresh' 'length' 'rlength' 'fs' 'ret'}; 36 | odef={0.0767 0.1524 5 16000 's'}; 37 | opt=fu_optstruct_init(opt,ofld,odef); 38 | 39 | 40 | %%%% preprocessing %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 41 | % stereo->mono, mean 0 42 | s = s(:,1)-mean(s(:,1)); 43 | % low pass filtering (just carried out if fs > 20kHz) 44 | s = fu_filter(s,'low',10000,opt.fs); 45 | 46 | 47 | %%%% settings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 48 | % reference window span 49 | rws = floor(opt.rlength*opt.fs); 50 | % signal length 51 | ls=length(s); 52 | % min pause length in samples 53 | ml=floor(opt.length*opt.fs); 54 | % global rmse and pause threshold 55 | e_glob = fu_rmse(s); 56 | t_glob = opt.f_thresh*e_glob; 57 | % stepsize 58 | %sts=floor(ml/4); 59 | sts=max(1,floor(0.05*opt.fs)); 60 | stsh=floor(sts/2); % for centering of reference window 61 | 62 | 63 | %%%% pause detection %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 64 | % output array collecting pause sample indices 65 | t=[]; 66 | j=1; 67 | 68 | 69 | for i=1:sts:ls 70 | %%%% window %%%%%%%%%%%%%%%%%%%%%%%%%%%%% 71 | yi=i:min(ls,i+ml-1); 72 | %tt=[yi(1) yi(end)] 73 | y=s(yi); 74 | e_y = fu_rmse(y); 75 | %%%% reference window %%%%%%%%%%%%%%%%%%% 76 | rw=s(fu_i_window(min(i+stsh,ls),rws,ls)); 77 | e_rw=fu_rmse(rw); 78 | if (e_rw <= t_glob); e_rw=e_glob; end 79 | %%%% if rmse in window below threshold %% 80 | if e_y <= e_rw*opt.f_thresh 81 | if size(t,1)==j 82 | % values belong to already detected pause 83 | if yi(1) < t(j,2) 84 | t(j,2)=yi(end); 85 | else % new pause 86 | j=j+1; 87 | t(j,:)=[yi(1) yi(end)]; 88 | end 89 | else % new pause 90 | t(j,:)=[yi(1) yi(end)]; 91 | end 92 | end 93 | end 94 | 95 | 96 | %%%%%% conversion of sample indices into %%%%%%%%%%%%%% 97 | %%%%%% time on- and offset values (sec) %%%%%%%%%%%%%%% 98 | 99 | if strcmp(opt.ret,'s'); t=t./opt.fs; end 100 | 101 | return 102 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_r2c.m: -------------------------------------------------------------------------------- 1 | function [v t]=fu_r2c(v) 2 | 3 | %v=fu_r2c(v) 4 | %[v t]=fu_r2c(v) 5 | %if V is a ROW VECTOR, it is transposed and T is set to 1 6 | %needed for uniform vector/matrix treatment in functions 7 | %operating on column vectors 8 | %see also fu_c2r, fu_transpose 9 | 10 | tb=0; 11 | % transpose row vector 12 | if size(v,1)==1 13 | v=v'; 14 | tb=1; 15 | end 16 | 17 | if nargout==2 18 | t=tb; 19 | end 20 | 21 | return 22 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_rmse.m: -------------------------------------------------------------------------------- 1 | function e = fu_rmse(x,y) 2 | 3 | %e = fu_rmse(x) 4 | %e = fu_rmse(x,y) 5 | %returns root mean squared error E between vector X and 0-line 6 | %or root mean squared error E between vectors X and Y 7 | 8 | if nargin < 2 9 | e=sqrt(sum(x.^2)/length(x)); 10 | else 11 | e=sqrt(sum((x-y).^2)/length(x)); 12 | end 13 | 14 | return 15 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_smooth.m: -------------------------------------------------------------------------------- 1 | function ys=fu_smooth(y,opt) 2 | 3 | %ys=fu_smooth(y,opt) 4 | %bracket for smoothing 5 | %faster but less flexible than fu_smoothing 6 | %y: vector 7 | % opt.mtd % as in fun smooth (+ 'none') 8 | % .wl % window length 9 | % .order % polynomial order for sgolay 10 | 11 | 12 | if nargin<1; opt=struct; end 13 | opt=fu_optstruct_init(opt,{'mtd' 'wl' 'order'},{'mova' 5 3}); 14 | 15 | if strcmp(opt.mtd,'none') 16 | ys=y; 17 | elseif ~strcmp(opt.mtd,'sgolay') 18 | ys=smooth(y,opt.wl,opt.mtd); 19 | else 20 | ys=smooth(y,opt.wl,opt.mtd,opt.order); 21 | end 22 | 23 | return 24 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_smooth_binvec.m: -------------------------------------------------------------------------------- 1 | function vs = fu_smooth_binvec(v,l); 2 | 3 | % vs = fu_smooth_binvec(v,l); 4 | % v: binary vector 5 | % l: minimum length of 1- or 0-subsequences 6 | % vs: smoothed vector (short subsequences get same value as neighbors) 7 | % e.g. v = [1 1 1 1 0 0 1 1 1 1]; 8 | % l = 3 9 | % --> vs = [1 1 1 1 1 1 1 1 1 1]; 10 | 11 | [vs tt] = fu_transp(v,'r'); 12 | 13 | vs = fu_smooth_binvec_sub(vs,1,l); 14 | vs = fu_smooth_binvec_sub(vs,0,l); 15 | 16 | vs=fu_transp(vs,tt); 17 | 18 | return 19 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_smooth_binvec_sub.m: -------------------------------------------------------------------------------- 1 | function v=fu_smooth_binvec_sub(v,b,l); 2 | 3 | %called by fu_smooth_binvec 4 | 5 | r = abs(b-1); 6 | i = find(v==b); 7 | if length(i)==0; return; end 8 | di = [1 diff(i)]; 9 | seq_i=[]; 10 | for j=1:length(di) 11 | if di(j)>1 12 | if length(seq_i) < l 13 | v(seq_i)=r; 14 | end 15 | seq_i=[]; 16 | end 17 | seq_i=[seq_i i(j)]; 18 | end 19 | 20 | % last seq 21 | if length(seq_i) < l; v(seq_i)=r; end 22 | 23 | return 24 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_sylbnd.m: -------------------------------------------------------------------------------- 1 | function sb = fu_sylbnd(s,sn,opt) 2 | 3 | %sb = fu_sylbnd(s,sn,opt) 4 | %called in fu_sylncl 5 | %s: signal vector 6 | %sn: vector with detected nucleus samples (by fu_sylncl_sub) 7 | %opt: as provided for fu_sylncl 8 | 9 | % window length for energy calculation in samples 10 | ml=floor(opt.length*opt.fs); 11 | % stepsize 12 | sts=max(1,floor(0.03*opt.fs)); 13 | 14 | sb=[]; 15 | for i=1:length(sn)-1; % for all adjacent syl ncl 16 | on=sn(i); 17 | off=sn(i+1); 18 | sw = s(on:off); 19 | ls = length(sw); 20 | all_i=[]; 21 | all_e=[]; 22 | for j=1:sts:length(sw) % for all windows within ncl pair 23 | yi=fu_i_window(j,ml,ls); 24 | y = sw(yi); 25 | e_y = fu_rmse(y); 26 | all_i=[all_i j]; 27 | all_e=[all_e e_y]; 28 | end 29 | [ymin ymini] = min(all_e); 30 | sb = [sb; on+all_i(ymini(1))]; 31 | end 32 | 33 | return 34 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_sylncl.m: -------------------------------------------------------------------------------- 1 | function [sn sb] = fu_sylncl(s,opt); 2 | 3 | % sn = fu_sylncl(s,opt); 4 | % [sn sb] = fu_sylncl(s,opt); 5 | % opt.do='apply': 6 | % default case. 7 | % returns vector sn of syllable nucleus samples in speech signal s 8 | % given opt structure with fields as specified in training output below. 9 | % Optionally, sb, a vector of syllable boundary samples is returned 10 | % (simply the sample minimum energy between two adjacent nuclei) 11 | % opt.do='train': 12 | % .ref: sample reference 13 | % .fs: sample frequency 14 | % .errtype: <'f'>|'io'|'mae' 15 | % error type: 1-fscore (best choice) 16 | % n_ins+n_omis (used in diss) 17 | % 1-MAE (after alignment) 18 | % returns structure SN to be used as OPT in 'apply' case 19 | % .f_thresh: energy threshold factor 20 | % .bf: lower and upper boundary frequencies for band pass filtering 21 | % .do: 'apply' 22 | % .fs: sample frequency of input signal 23 | % .e_min: minimum needed proportion of max energy 24 | % .length: length of energy window in s 25 | % .rlength: length of reference energy window (>length) in s 26 | % .md: min distance between subsequent nuclei in s (set to 0 if to be 27 | % neglected) 28 | % .nouse_int: <[]>; n x 2 matrix [on off] of intervals not to be used 29 | % (e.g. pause intervals). In samples! E.g. output of 30 | % fu_pause_detector (with opt.ret='smpl'). Additionally, 31 | % 0-output of fu_voicing (to be transformed for compatibility) can 32 | % be used. Both can also be called inline setting .do_nouse>0 33 | % .do_nouse: <0>|1|2|3: create or enlarge .nouse_int matrix by 34 | % finding pauses and/or voiceless utterance parts 35 | % <0> - do nothing 36 | % 1 - detect pauses and voiceless utterance parts 37 | % 2 - pause only 38 | % 3 - voiceless utterance parts only 39 | % .verbose: plot signal and nuclei 40 | % 41 | % -- exclude pause and voiceless intervals from analysis? 42 | % opt.pau.do=<'apply'>|'skip': prceeding 43 | % .* see matlab_lib/fu_pause_detector.m 44 | % opt.voi.do=<'apply'>|'skip': preceeding voicing detection 45 | % .*: see fu_voicing.m 46 | % 47 | % minimal application example: 48 | % [y fs] = wavread('myaudio.wav'); 49 | % opt.fs = fs; 50 | % opt.verbose = 1; 51 | % [sn sb] = fu_sylncl(y,opt); 52 | 53 | global s_glob; 54 | global opt_glob; 55 | close all 56 | 57 | if nargin==1; opt=struct; end 58 | opt=fu_optstruct_init(opt,{'do' 'nouse_int' 'do_nouse' 'errtype'},{'apply' [] 2 'f'}); 59 | ofld={'do' 'bf' 'f_thresh' 'length' 'rlength' 'md' 'e_min' 'fs' ... 60 | 'verbose' 'pau' 'unv'}; 61 | 62 | 63 | % preprocessing -> defining intervals not usable for syllable nuclei 64 | % matrix, rows: on- and offset in samples 65 | %opt.nouse_int = fu_sylncl_no_use_intervals(s,opt); 66 | opt.nouse_int = []; 67 | 68 | if strcmp(opt.do,'apply') %%%%%% apply %%%%%%%%% 69 | %fscore optimised on si1000p reference data 70 | odef={'apply' [212.5509 3967.1] 1.0681 0.0776 0.1491 0.1 0.1571 16000 0 struct struct}; 71 | opt=fu_optstruct_init(opt,ofld,odef); 72 | opt.pau = fu_optstruct_init(opt.pau, {'fs' 'ret'}, {opt.fs 'smpl'}); 73 | opt.unv = fu_optstruct_init(opt.unv, {'sts'}, {1}); 74 | sn=fu_sylncl_sub(s,opt); 75 | % add syl boundaries 76 | if nargout>1 77 | sb=fu_sylbnd(s,sn,opt); 78 | end 79 | else %%%%%% train %%%%%%%%% 80 | s_glob=s; 81 | opt_glob=opt; 82 | %o_opt=optimset(@fminunc); 83 | o_opt=optimset(@fminsearch); 84 | o_opt=optimset('LargeScale','on'); 85 | % [f_lowbnd/100 f_upbndf/1000 threshold_factor ncl_length ref_length 86 | % minimum_rms] 87 | w0=[2.3 2.9 1.06 0.08 0.14 0.16]; 88 | 89 | %[w fval ef o]=fminunc(@fu_sylncl_err,w0,o_opt); 90 | [w fval ef o]=fminsearch(@fu_sylncl_err,w0,o_opt); 91 | opt=fu_optstruct_init(opt,ofld,{'apply' [w(1)*100 w(2)*1000] ... 92 | w(3) w(4) w(5) w(6) opt.fs 1}); 93 | sn=fu_sylncl_sub(s,opt); 94 | end 95 | 96 | if opt.verbose==1 97 | %[sn [sb; NaN]] 98 | %t=[1:length(s)]./opt.fs; 99 | t=[1:length(s)]; 100 | plot(t,s); 101 | hold on 102 | %if isfield(opt,'ref') 103 | % for i=opt.ref; plot([i i],[-1 1],'-g'); end 104 | %end 105 | for i=sn; plot([i i],[-1 1],'-r'); end 106 | if nargout>1 107 | for i=sb; plot([i i],[-1 1],'-g'); end 108 | end 109 | end 110 | 111 | if strcmp(opt.do,'train') 112 | opt.do='apply'; 113 | opt.error=fval; 114 | sn=opt; 115 | sn_opt=opt; 116 | save('sn_opt','sn_opt'); 117 | end 118 | 119 | return 120 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_sylncl_sub.m: -------------------------------------------------------------------------------- 1 | function t=fu_sylncl_sub(s,opt); 2 | 3 | % returns samples of syllable nuclei given signal S and processing 4 | % options OPT (see fu_sylncl for details) 5 | % called by fu_sylncl 6 | 7 | % recall higher before 2. nucl splitting. why??? 8 | 9 | %% settings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 10 | % reference window span 11 | rws = floor(opt.rlength*opt.fs); 12 | % signal length 13 | ls=length(s); 14 | % window length for energy calculation in samples 15 | ml=floor(opt.length*opt.fs); 16 | % minimum distance between subsequent nuclei in samples 17 | md=floor(opt.md*opt.fs); 18 | % stepsize 19 | sts=max(1,floor(0.03*opt.fs)); 20 | stsh=floor(sts/2); % for centering of reference window 21 | 22 | %% no use intervals (pause, voiceless) %%%%%%%%%%%%%%%%%%%% 23 | % -> vector of all samples not to be used 24 | t_nou_init = []; 25 | t_nou_pau=[]; 26 | voi=[]; 27 | t_nou=[]; 28 | if isfield(opt,'nouse_int') 29 | t_nou_init = opt.nouse_int; 30 | end 31 | if opt.do_nouse>0 32 | if opt.do_nouse < 3 33 | t_nou_pau = fu_pause_detector(s,opt.pau); 34 | end 35 | if (opt.do_nouse==1 | opt.do_nouse==3) 36 | [voi zrr] = fu_voicing(s,opt.fs,opt.unv); 37 | end 38 | end 39 | for i=1:size(t_nou_init,1) 40 | t_nou=[t_nou t_nou_init(i,1):t_nou_init(i,2)]; 41 | end 42 | for i=1:size(t_nou_pau,1) 43 | t_nou=[t_nou t_nou_pau(i,1):t_nou_pau(i,2)]; 44 | end 45 | t_nou=unique([t_nou find(voi==0)']); 46 | 47 | 48 | %%%% filtering %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 49 | if length(opt.bf)==1; ft='low'; 50 | else; ft='band'; end 51 | 52 | ord=5; % filter order, the higher the steeper, but incapable to filter 53 | % narrow bands 54 | s=fu_filter(s,ft,opt.bf,opt.fs,ord); 55 | 56 | %%%% settings 2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 57 | % minimum energy as portion of maximum energy found 58 | e_y=[]; 59 | for i=1:sts:ls 60 | %%%% window %%%%%%%%%%%%%%%%%%%%%%%%%%%%% 61 | yi=i:min(ls,i+ml-1); 62 | y=s(yi); 63 | e_y = [e_y fu_rmse(y)]; 64 | end 65 | 66 | e_min=opt.e_min*max(e_y); 67 | mey=max(e_y); 68 | 69 | 70 | % output vector collecting nucleus sample indices 71 | t=[]; 72 | 73 | all_i=[]; 74 | all_e=[]; 75 | all_r=[]; 76 | 77 | 78 | for i=1:sts:ls 79 | yi=fu_i_window(i,ml,ls); 80 | y=s(yi); 81 | e_y = fu_rmse(y); 82 | rwi = fu_i_window(i,rws,ls); 83 | rw = s(rwi); 84 | e_rw=fu_rmse(rw); 85 | all_i=[all_i i]; 86 | all_e=[all_e e_y]; 87 | all_r=[all_r e_rw]; 88 | end 89 | 90 | lmopt=struct; 91 | 92 | lmopt.peak.mpd = floor(opt.fs*opt.md/sts); 93 | [pks idx] = fu_locmax(all_e,lmopt); 94 | t=[]; 95 | for i=idx 96 | if (all_e(i) >= all_r(i)*opt.f_thresh & all_e(i) > e_min) 97 | if length(find(t_nou==all_i(i)))==0 98 | t=[t; all_i(i)]; 99 | end 100 | end 101 | end 102 | 103 | 104 | 105 | return 106 | 107 | 108 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_transp.m: -------------------------------------------------------------------------------- 1 | function [xt done] = fu_transp(x,do) 2 | 3 | % xt = fu_transp(x,do) 4 | % [xt done] = fu_transp(x,do) 5 | % x: vector 6 | % do: <'t'>|'r'|'c'|'i' - transpose, make row, make column, ignore 7 | % 'r' and 'c' just make sense for vectors!! 8 | % input 1|0 is mapped to 't'|'i' for backward compatibility 9 | % xt: x +/- transposed 10 | % done: 't' if transformation was carried out, else 'i' (for consistent 11 | % reapplication of fu_transpose_vec) 12 | % Of use e.g. if a function would need a column vector as input without 13 | % bothering the user and returning a vector in the same format as the input 14 | % See example in fu_smooth_binvec.m 15 | 16 | if nargin<2; do='t'; end 17 | if isnumeric(do) 18 | if do==1; do='t'; 19 | else do='i'; 20 | end 21 | end 22 | 23 | dun='i'; 24 | xt=x; 25 | 26 | if strcmp(do,'t') 27 | xt=x'; 28 | dun=do; 29 | elseif ~strcmp(do,'i') 30 | s=size(x); 31 | if min(s) > 1 32 | disp('Transposition just applicable for vectors. Done nothing.'); 33 | else 34 | if ((strcmp(do,'r') && s(2)==1) || (strcmp(do,'c') && s(1)==1)) 35 | xt=x'; 36 | dun='t'; 37 | end 38 | end 39 | end 40 | 41 | if nargout > 1; done=dun; end 42 | 43 | return 44 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_trim_vec.m: -------------------------------------------------------------------------------- 1 | function vt = fu_trim_vec(v,w,a); 2 | 3 | % vt=fu_trim_vec(v,w,e); 4 | % pops vector V or pushes scalar/vector A to V until size of V is equal to 5 | % size of W 6 | 7 | vt=column2rowvec(v); 8 | while length(vt)); 4 | % [voi zr] = fu_voicing(y,sr <,opt>); 5 | % Y: signal 6 | % SR: sample rate 7 | % VOI: vector with 1 element per window 8 | % 1: voiced 9 | % 0: voiceless/pause 10 | % ZR: do=='apply': vector of zero crossing rates, one value per window 11 | % 'train': opt struct with optimised .th and .zr_th and .err error 12 | % OPT: 13 | % .do: |train 14 | % .wl: window length <0.03> (<1: in s, >=1: in samples) 15 | % .th: <0.002> relative amplitude threshold, y (<1: in s, >=1: in samples) 17 | % .zr_th: <2000> (below & >0: voiced; use higher value for increased 18 | % recall, lower value for increased precision) 19 | % .min_nf: <3> (min number of frames in a row to be constantly 20 | % (un)voiced. Interpolation over shorter sequences 21 | % .ret: <'w'>|'smpl' 22 | % 'w': one value per window 23 | % 'smpl': one value per signal sample 24 | % IF .do equal 'train' 25 | % .errfun <@fu_voicing_err> 26 | % .ref: reference matrix or vector (see e.g. voi_ref.dat) 27 | % --> optimisation of .th and .zr_th 28 | % integrated training call by FU_VOI_OPTIM_BRACKET 29 | % 30 | % voicing detection by zero crossing rate 31 | % BEWARE: Default parameters are optimised on si1000p reference and 32 | % sts=0.01. If step size is changed, than $sts in sncl_ref.pl has to 33 | % be changed the same way!!! 34 | % param values are informally optimised on SI1000P reference data: 35 | % hamming: 0.1180 36 | % precision: 0.8898 37 | % recall: 0.9045 38 | 39 | if nargin < 3; opt=struct; end 40 | opt = fu_optstruct_init(opt,{'wl' 'th' 'sts' 'zr_th' 'do' 'min_nf' 'ret'},... 41 | {0.03 0.002 0.01 2000 'apply' 3 'w'}); 42 | opt.sr = sr; 43 | 44 | 45 | if strcmp(opt.do,'apply') %%%% application 46 | [voi zr] = fu_voicing_sub(y,opt); 47 | if nargout==2; zrr=zr; end 48 | else %%%%%%%%%%%%%%%%%%%%%%%% training 49 | %o_opt=optimset(@fminunc); 50 | o_opt=optimset(@fminsearch); 51 | o_opt=optimset('LargeScale','on'); 52 | w0=[0.004 1000]; 53 | %[w fval ef o]=fminunc(opt.errfun,w0,o_opt); 54 | [w fval ef o]=fminsearch(opt.errfun,w0,o_opt); 55 | opt.th=w(1); 56 | opt.zr_th=w(2); 57 | [voiv zr] = fu_voicing_sub(y,opt); 58 | % error 59 | voiv=fu_trim_vec(voiv,opt.ref,0); 60 | e = pdist([voiv;opt.ref],'hamming'); 61 | voi=opt; 62 | voi.err=e; 63 | if nargout==2; zrr=e; end % to avoid crash 64 | end 65 | 66 | return 67 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_voicing_sub.m: -------------------------------------------------------------------------------- 1 | function [voi zrr] = fu_voicing_sub(y,opt); 2 | 3 | % returns binary vector (1=voiced frame) for signal vector Y 4 | % and specs given in OPT 5 | % called by fu_voicing 6 | 7 | zr = fu_zero_crossing_rate(y,opt.sr,opt); 8 | voi=zeros(length(zr),1); 9 | voi(find(zr0))=1; 10 | 11 | if opt.min_nf>1 12 | voi=fu_smooth_binvec(voi,opt.min_nf); 13 | end 14 | 15 | 16 | if nargout==2; zrr=zr; end 17 | 18 | return 19 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_window_bnd.m: -------------------------------------------------------------------------------- 1 | function wb = fu_window_bnd(wl,ly,opt); 2 | 3 | %wb = fu_window_bnd(wl,ly,opt); 4 | %returns matrix of window on- and offset indices (one pair per row) 5 | %windows are centered on each index 1:opt.sts:ly 6 | % wl: window length 7 | % ly: length of vector 8 | % opt: 9 | % .sts: int <1> - step size 10 | % .idx: <0>|1 - if one not just bounds but also all indices between 11 | % e.g. wl=2; ly=6; opt.sts=1; opt.idx=0; 12 | % --> wb = [1 2; 1 3; 2 4; 3 5; 4 6; 5 6] 13 | % opt.idx=0; 14 | % --> wb = [1 1 2; 1 2 3; 2 3 4; 3 4 5; 4 5 6; 5 6 6] 15 | %usable for vectorisation of algorithms 16 | 17 | if nargin<3; opt=struct; end 18 | opt = fu_optstruct_init(opt,{'sts' 'idx'},{1 0}); 19 | 20 | x=[1:opt.sts:ly]'; 21 | h=round(wl/2); 22 | 23 | if opt.idx==0 24 | hh = [-h h]; 25 | else 26 | hh= -h:h; 27 | end 28 | 29 | wb=repmat(hh,size(x,1),1)+repmat(x,1,size(hh,2)); 30 | wb(find(wb<1))=1; 31 | wb(find(wb>ly))=ly; 32 | 33 | return 34 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_window_vec.m: -------------------------------------------------------------------------------- 1 | function m = fu_window_vec(v,opt); 2 | 3 | % m = fu_window_vec(v,opt); 4 | % windows vector V according to specs in struct opt 5 | % V: input vector 6 | % M: matrix, one window per row 7 | % OPT: 8 | % .sts: <1> int, step size 9 | % .wl: <1> int, window length 10 | 11 | % opt init 12 | % idx is needed for fu_window_bnd, not to be specified by user 13 | %usable for vectorisation of algorithms 14 | 15 | if nargin<2; opt=struct; end 16 | opt = fu_optstruct_init(opt,{'sts' 'wl'},{1 1}); 17 | opt.idx=1; 18 | wb = fu_window_bnd(opt.wl,length(v),opt); 19 | 20 | m=v(wb); 21 | 22 | return 23 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/fu_zero_crossing_rate.m: -------------------------------------------------------------------------------- 1 | function zr = fu_zero_crossing_rate(y,sr,opt); 2 | 3 | %zr = fu_zero_crossing_rate(y,sr [,opt]); 4 | %y: signal vector 5 | %sr: sample rate (<16000>) 6 | %opt 7 | % .wl: <0.01> window length (<1: in s, >=1: in samples) 8 | % .th: <0.004> min abs extreme point amplitude (vs. noise in ) 9 | % used as a factor: .th * max(abs(y)) ! 10 | % .sts: step size <1> (<1: in s, >=1: in samples) 11 | %zr: zero crossing rate in crossing/sec (same length as Y) 12 | % set all data points below .th to NaN 13 | % 14 | % center window of length .wl on each data point in Y 15 | % 16 | 17 | if nargin < 2; sr=16000; end 18 | if nargin < 3; opt=struct; end 19 | opt=fu_optstruct_init(opt,{'wl' 'th' 'sts'},{0.01 0.004 1}); 20 | 21 | % sec -> samples 22 | if opt.wl < 1; opt.wl = round(opt.wl*sr); end 23 | if opt.sts < 1; opt.sts = round(opt.sts*sr); end 24 | 25 | % filter values below threshold 26 | ya=abs(y); 27 | y(find(ya zero crossing 30 | zcv = [NaN; row2columnvec(y(1:end-1).*y(2:end))]; 31 | 32 | % -> matrix, one row per window 33 | zcm = fu_window_vec(zcv,opt); 34 | 35 | % zero crossings 36 | [ri ci] = find(zcm<=0); 37 | 38 | % how many zero crossings per window? 39 | zcw = fu_typecount(ri); 40 | 41 | % getting rate 42 | l=size(zcm,2); 43 | 44 | zr=zeros(size(zcm,1),1); 45 | zr(zcw(:,1)) = zcw(:,2) / l * sr; 46 | 47 | return 48 | -------------------------------------------------------------------------------- /matlabScripts/nucleus_detection_matlab/row2columnvec.m: -------------------------------------------------------------------------------- 1 | function v=row2columnvec(v) 2 | 3 | if length(v)==0; return; end 4 | 5 | if length(v(:,1))==1 6 | v=v'; 7 | end 8 | 9 | return 10 | -------------------------------------------------------------------------------- /praatScripts/get_pitch_and_intensity.praat: -------------------------------------------------------------------------------- 1 | # Based on http://www.fon.hum.uva.nl/praat/manual/Script_for_listing_time_--F0_--intensity.html 2 | # 3 | 4 | 5 | # Pitch and intensity parameters 6 | # male: 50, 350 7 | # female: 75, 450 8 | sampleStep = 0.01 9 | minPitch = 75 10 | maxPitch = 450 11 | 12 | 13 | # Directory needs a final '/' 14 | # **Both directories need to already exist** 15 | input_directory$ = "/Users/tmahrt/Desktop/experiments/LMEDS_studies/RPT_English/features_test/wav/female/" 16 | output_directory$ = "/Users/tmahrt/Desktop/experiments/LMEDS_studies/RPT_English/features_test/pitch_and_intensity_listings/" 17 | 18 | strings = Create Strings as file list... list 'input_directory$'*.wav 19 | numberOfFiles = Get number of strings 20 | for ifile to numberOfFiles 21 | selectObject: strings 22 | fileName$ = Get string: ifile 23 | Read from file: input_directory$ + fileName$ 24 | name$ = fileName$ - ".wav" 25 | 26 | sound = selected ("Sound") 27 | selectObject: sound 28 | tmin = Get start time 29 | tmax = Get end time 30 | 31 | To Pitch: sampleStep, minPitch, maxPitch 32 | Rename: "pitch" 33 | 34 | selectObject: sound 35 | To Intensity: minPitch, sampleStep 36 | Rename: "intensity" 37 | 38 | for i to (tmax-tmin)/sampleStep 39 | time = tmin + i * sampleStep 40 | selectObject: "Pitch pitch" 41 | pitch = Get value at time: time, "Hertz", "Linear" 42 | selectObject: "Intensity intensity" 43 | intensity = Get value at time: time, "Cubic" 44 | appendFileLine: "'output_directory$''name$'.txt", fixed$ (time, 2), ",", fixed$ (pitch, 3), ",", fixed$ (intensity, 3) 45 | endfor 46 | 47 | 48 | # Cleanup 49 | 50 | selectObject: "Pitch pitch" 51 | Remove 52 | 53 | selectObject: "Intensity intensity" 54 | Remove 55 | 56 | selectObject: sound 57 | Remove 58 | 59 | endfor 60 | 61 | selectObject: strings 62 | Remove 63 | 64 | -------------------------------------------------------------------------------- /praatScripts/psolaPitch.praat: -------------------------------------------------------------------------------- 1 | numSteps = %(num_steps)s 2 | 3 | Read from file... %(input_dir)s/%(input_name)s.wav 4 | 5 | for iStep to numSteps - 1 6 | zeroedI = iStep 7 | 8 | Read from file... %(pitch_dir)s/%(input_name)s_'zeroedI'.PitchTier 9 | select Sound %(input_name)s 10 | To Manipulation... 0.01 %(pitch_lower_bound)d %(pitch_upper_bound)d 11 | 12 | select PitchTier %(input_name)s_'zeroedI' 13 | plus Manipulation %(input_name)s 14 | Replace pitch tier 15 | 16 | select Manipulation %(input_name)s 17 | Get resynthesis (overlap-add) 18 | Save as WAV file... %(output_dir)s/%(output_name)s_'zeroedI'.wav 19 | Remove 20 | 21 | select Manipulation %(input_name)s 22 | Remove 23 | select PitchTier %(input_name)s_'zeroedI' 24 | Remove 25 | endfor 26 | 27 | select Sound %(input_name)s 28 | Remove -------------------------------------------------------------------------------- /pyacoustics/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Oct 27, 2014 3 | 4 | @author: tmahrt 5 | """ 6 | -------------------------------------------------------------------------------- /pyacoustics/aggregate_features.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Oct 20, 2014 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import os 8 | from os.path import join 9 | import io 10 | 11 | from pyacoustics.utilities import utils 12 | 13 | 14 | def aggregateFeatures(featurePath, featureList, headerStr=None): 15 | 16 | outputDir = join(featurePath, "aggr") 17 | utils.makeDir(outputDir) 18 | 19 | fnList = [] 20 | dataList = [] 21 | 22 | # Find the files that exist in all features 23 | for feature in featureList: 24 | fnSubList = utils.findFiles(join(featurePath, feature), filterExt=".txt") 25 | fnList.append(fnSubList) 26 | 27 | actualFNList = [] 28 | for featureFN in fnList[0]: 29 | if all([featureFN in subList for subList in fnList]): 30 | actualFNList.append(featureFN) 31 | 32 | for featureFN in actualFNList: 33 | dataList = [] 34 | for feature in featureList: 35 | featureDataList = utils.openCSV( 36 | join(featurePath, feature), featureFN, encoding="utf-8" 37 | ) 38 | dataList.append([",".join(row) for row in featureDataList]) 39 | 40 | name = os.path.splitext(featureFN)[0] 41 | 42 | dataList.insert(0, [name for _ in range(len(dataList[0]))]) 43 | tDataList = utils.safeZip(dataList, enforceLength=True) 44 | outputList = [",".join(row) for row in tDataList] 45 | outputTxt = "\n".join(outputList) 46 | 47 | outputFN = join(outputDir, name + ".csv") 48 | with io.open(outputFN, "w", encoding="utf-8") as fd: 49 | fd.write(outputTxt) 50 | 51 | # Cat all files together 52 | aggrOutput = [] 53 | 54 | if headerStr is not None: 55 | aggrOutput.append(headerStr) 56 | 57 | for fn in utils.findFiles(outputDir, filterExt=".csv"): 58 | if fn == "all.csv": 59 | continue 60 | with io.open(join(outputDir, fn), "r", encoding="utf-8") as fd: 61 | aggrOutput.append(fd.read()) 62 | 63 | with io.open(join(outputDir, "all.csv"), "w", encoding="utf-8") as fd: 64 | fd.write("\n".join(aggrOutput)) 65 | -------------------------------------------------------------------------------- /pyacoustics/intensity_and_pitch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/intensity_and_pitch/__init__.py -------------------------------------------------------------------------------- /pyacoustics/intensity_and_pitch/get_f0.py: -------------------------------------------------------------------------------- 1 | """ 2 | A Python implementation of ESPS's getF0 function 3 | 4 | The implementation is part of tkSnack. As I recall, it is a bit 5 | cumbersome to install, although there are python distributions, 6 | like ActiveState, which come with it preinstalled. For more information, 7 | visit the snack website: 8 | http://www.speech.kth.se/snack/ 9 | """ 10 | import os 11 | from os.path import join 12 | 13 | import Tkinter 14 | 15 | root = Tkinter.Tk() 16 | import tkSnack 17 | 18 | tkSnack.initializeSnack(root) 19 | 20 | 21 | from rpt_feature_suite.utilities import utils 22 | 23 | MALE = "male" 24 | FEMALE = "female" 25 | 26 | SAMPLE_FREQ = 100 27 | 28 | 29 | def extractPitch(fnFullPath, minPitch, maxPitch): 30 | """ 31 | 32 | Former default pitch values: male (50, 350); female (75, 450) 33 | """ 34 | 35 | soundObj = tkSnack.Sound(load=fnFullPath) 36 | 37 | output = soundObj.pitch(method="ESPS", minpitch=minPitch, maxpitch=maxPitch) 38 | 39 | pitchList = [] 40 | for value in output: 41 | value = value[0] 42 | 43 | if value == 0: 44 | value = int(value) 45 | pitchList.append(value) 46 | 47 | return pitchList, SAMPLE_FREQ 48 | 49 | 50 | def getPitchAtTime(pitchList, startTime, endTime): 51 | startIndex = int(startTime * SAMPLE_FREQ) 52 | endIndex = int(endTime * SAMPLE_FREQ) 53 | 54 | return pitchList[startIndex:endIndex] 55 | 56 | 57 | if __name__ == "__main__": 58 | path = "/Users/tmahrt/Desktop/fire_new_audio_test" 59 | for name in utils.findFiles(path, filterExt=".wav", stripExt=True): 60 | tmpPitchList = extractPitch(join(path, name + ".wav"), 75, 450) 61 | tmpPitchList = [str(val) for val in tmpPitchList] 62 | 63 | with open(join(path, name + "_f0.csv"), "w") as fd: 64 | fd.write("\n".join(tmpPitchList)) 65 | -------------------------------------------------------------------------------- /pyacoustics/morph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/morph/__init__.py -------------------------------------------------------------------------------- /pyacoustics/morph/intensity_morph.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Apr 2, 2015 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import os 8 | from os.path import join 9 | 10 | import math 11 | import copy 12 | 13 | from pyacoustics.morph.morph_utils import common 14 | from pyacoustics.morph.morph_utils import plot_morphed_data 15 | from pyacoustics.utilities import utils 16 | from pyacoustics.utilities import sequences 17 | from pyacoustics.signals import audio_scripts 18 | from pyacoustics.utilities import my_math 19 | 20 | 21 | def intensityMorph( 22 | fromWavFN, 23 | toWavFN, 24 | fromWavTGFN, 25 | toWavTGFN, 26 | tierName, 27 | numSteps, 28 | coreChunkSize, 29 | plotFlag, 30 | ): 31 | fromDataTupleList = common.getIntervals(fromWavTGFN, tierName) 32 | toDataTupleList = common.getIntervals(toWavTGFN, tierName) 33 | 34 | outputName = os.path.splitext(fromWavFN)[0] + "_int_" + tierName 35 | 36 | _intensityMorph( 37 | fromWavFN, 38 | toWavFN, 39 | fromDataTupleList, 40 | toDataTupleList, 41 | numSteps, 42 | coreChunkSize, 43 | plotFlag, 44 | outputName, 45 | ) 46 | 47 | 48 | def _intensityMorph( 49 | fromWavFN, 50 | toWavFN, 51 | fromDataTupleList, 52 | toDataTupleList, 53 | numSteps, 54 | coreChunkSize, 55 | plotFlag, 56 | outputName=None, 57 | ): 58 | if outputName is None: 59 | outputName = os.path.splitext(fromWavFN)[0] + "_int" 60 | 61 | outputDir = join(os.path.split(fromWavFN)[0], "output") 62 | utils.makeDir(outputDir) 63 | 64 | # Determine the multiplication values to be used in normalization 65 | # - this extracts one value per chunk 66 | expectedLength = 0 67 | normFactorList = [] 68 | truncatedToList = [] 69 | chunkSizeList = [] 70 | fromDataList = [] 71 | 72 | fromParams = audio_scripts.getParams(fromWavFN) 73 | toParams = audio_scripts.getParams(toWavFN) 74 | 75 | for fromTuple, toTuple in zip(fromDataTupleList, toDataTupleList): 76 | fromStart, fromEnd = fromTuple[:2] 77 | toStart, toEnd = toTuple[:2] 78 | 79 | expectedLength += (fromEnd - fromStart) * fromParams[2] 80 | 81 | fromDataList.extend(fromSubWav.rawDataList) 82 | 83 | normFactorListTmp, a = getRelativeNormalizedFactors( 84 | fromSubWav, toSubWav, coreChunkSize 85 | ) 86 | tmpChunkList = [tmpChunkSize for value, tmpChunkSize in normFactorListTmp] 87 | chunkSizeList.append(sum(tmpChunkList)) 88 | normFactorList.extend(normFactorListTmp) 89 | truncatedToList.extend(a) 90 | 91 | interpolatedResults = [] 92 | normFactorGen = [ 93 | sequences.interp(1.0, factor[0], numSteps) for factor in normFactorList 94 | ] 95 | tmpChunkSizeList = [factor[1] for factor in normFactorList] 96 | for i in xrange(numSteps): 97 | outputFN = "%s_s%d_%d_%d.wav" % (outputName, coreChunkSize, numSteps - 1, i) 98 | 99 | tmpNormFactorList = [next(normFactorGen[j]) for j in xrange(len(normFactorGen))] 100 | 101 | # Skip the first value (same as the input value) 102 | if i == 0: 103 | continue 104 | 105 | tmpInputList = zip(tmpNormFactorList, tmpChunkSizeList) 106 | 107 | normalizationTuple = expandNormalizationFactors(tmpInputList) 108 | expandedNormFactorList = normalizationTuple[0] 109 | 110 | # It happened once that the expanded factor list was off by one value 111 | # -- I could not determine why, so this is just a cheap hack 112 | if len(expandedNormFactorList) == (expectedLength - 1): 113 | expandedNormFactorList.append(expandedNormFactorList[-1]) 114 | 115 | # print("Diff: ", expectedLength, len(expandedNormFactorList)) 116 | assert expectedLength == len(expandedNormFactorList) 117 | 118 | newWavObj = copy.deepcopy(fromWavObj) 119 | newRawDataList = [] 120 | 121 | # Apply the normalization and reinsert the data back 122 | # into the original file 123 | offset = 0 124 | for fromTuple, chunkSize in zip(fromDataTupleList, chunkSizeList): 125 | fromStart, fromEnd = fromTuple[:2] 126 | fromSubWav = fromWavObj.extractSubsegment(fromStart, fromEnd) 127 | assert len(fromSubWav.rawDataList) == len( 128 | expandedNormFactorList[offset : offset + chunkSize] 129 | ) 130 | 131 | tmpList = [ 132 | fromSubWav.rawDataList, 133 | expandedNormFactorList[offset : offset + chunkSize], 134 | ] 135 | subRawDataList = [ 136 | value * normFactor 137 | for value, normFactor in utils.safeZip(tmpList, enforceLength=True) 138 | ] 139 | newRawDataList.extend(subRawDataList) 140 | 141 | offset += chunkSize 142 | 143 | newWavObj = audio.WavObj(newRawDataList, fromWavObj.samplingRate) 144 | newWavObj.save(join(outputDir, outputFN)) 145 | 146 | interpolatedResults.append(newWavObj.rawDataList) 147 | 148 | plotFN = "%s_s%d_%d.png" % (outputFN, coreChunkSize, numSteps) 149 | 150 | if plotFlag: 151 | plotMorphedData.plotIntensity( 152 | fromDataList, 153 | truncatedToList, 154 | interpolatedResults, 155 | expandedNormFactorList, 156 | os.path.join(outputDir, plotFN), 157 | ) 158 | 159 | 160 | def getNormalizationFactor(lst, refLst=None): 161 | """""" 162 | 163 | # Get the source values that we will be normalizing 164 | lst = list(set(lst)) 165 | if 0 in lst: 166 | lst.pop(lst.index(0)) 167 | 168 | actMaxV = float(max(lst)) 169 | actMinV = float(min(lst)) 170 | 171 | # Get the reference values 172 | if refLst is None: 173 | refMaxV = 32767.0 174 | refMinV = -32767.0 175 | else: 176 | refLst = list(set(refLst)) 177 | if 0 in refLst: 178 | refLst.pop(refLst.index(0)) 179 | 180 | refMaxV = float(max(refLst)) 181 | refMinV = float(min(refLst)) 182 | 183 | actualFactor = min(refMaxV / actMaxV, abs(refMinV) / abs(actMinV)) 184 | # print("Normalization factor: ", actualFactor) 185 | 186 | return actualFactor 187 | 188 | 189 | def getRelativeNormalizedFactors(fromDataList, toDataList, chunkSize): 190 | """ 191 | Determines the factors to be used to normalize sourceWav from targetWav 192 | 193 | This can be used to relatively normalize the source based on the target 194 | on an iterative basis (small chunks are normalized rather than the entire 195 | wav. 196 | """ 197 | 198 | # Sample proportionately from the targetWav 199 | # - if the two lists are the same length, there is no change 200 | # - if /target/ is shorter, it will be lengthened with some repeated values 201 | # - if /target/ is longer, it will be shortened with some values dropped 202 | tmpIndexList = sequences.interp(0, len(toDataList) - 1, fromDataList) 203 | newTargetRawDataList = [toDataList[int(round(i))] for i in tmpIndexList] 204 | 205 | assert len(fromDataList) == len(newTargetRawDataList) 206 | 207 | fromGen = sequences.subsequenceGenerator( 208 | fromDataList, chunkSize, sequences.sampleMiddle, sequences.DO_SAMPLE_GATED 209 | ) 210 | toGen = sequences.subsequenceGenerator( 211 | newTargetRawDataList, 212 | chunkSize, 213 | sequences.sampleMiddle, 214 | sequences.DO_SAMPLE_GATED, 215 | ) 216 | 217 | normFactorList = [] 218 | i = 0 219 | for fromTuple, toTuple in zip(fromGen, toGen): 220 | fromDataChunk = fromTuple[0] 221 | toDataChunk = toTuple[0] 222 | distToNextControlPoint = fromTuple[2] 223 | normFactor = getNormalizationFactor(fromDataChunk, toDataChunk) 224 | normFactorList.append((normFactor, distToNextControlPoint)) 225 | # i += 1 226 | # if i >= 38: 227 | # print("hello") 228 | 229 | # print(len(sourceWav.rawDataList), allChunks) 230 | # assert(len(sourceWav.rawDataList) == allChunks) 231 | return normFactorList, newTargetRawDataList 232 | 233 | 234 | def expandNormalizationFactors(normFactorList): 235 | """ 236 | Expands the normFactorList from being chunk-based to sample-based 237 | 238 | E.g. A wav with 1000 samples may be represented by a factorList of 5 chunks 239 | (5 factor values). This function will expand that to 1000. 240 | """ 241 | 242 | i = 0 243 | normFactorsFull = [] 244 | controlPoints = [] 245 | while i < len(normFactorList) - 1: 246 | startVal, chunkSize = normFactorList[i] 247 | endVal = normFactorList[i + 1][0] 248 | normFactorsFull.extend(my_math.linspace(startVal, endVal, chunkSize)) 249 | 250 | controlPoints.append(startVal) 251 | controlPoints.extend(my_math.linspace(startVal, startVal, chunkSize - 1)) 252 | i += 1 253 | 254 | # We have no more data, so just repeat the final norm factor at the tail 255 | # of the file 256 | value, finalChunkSize = normFactorList[i] 257 | controlPoints.append(value) 258 | controlPoints.extend(my_math.linspace(startVal, startVal, finalChunkSize - 1)) 259 | normFactorsFull.extend(my_math.linspace(value, value, finalChunkSize)) 260 | 261 | print("Norm factors full: %d" % len(normFactorsFull)) 262 | return normFactorsFull, controlPoints 263 | -------------------------------------------------------------------------------- /pyacoustics/signals/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/signals/__init__.py -------------------------------------------------------------------------------- /pyacoustics/signals/audio_scripts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Aug 23, 2014 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import os 8 | from os.path import join 9 | 10 | import struct 11 | import wave 12 | import audioop 13 | 14 | from pyacoustics.utilities import utils 15 | 16 | 17 | def loadWavFile(wavFN): 18 | sampWidthDict = {1: "b", 2: "h", 4: "i", 8: "q"} 19 | audiofile = wave.open(wavFN, "r") 20 | 21 | params = audiofile.getparams() 22 | sampwidth = params[1] 23 | nframes = params[3] 24 | 25 | byteCode = sampWidthDict[sampwidth] 26 | waveData = audiofile.readframes(nframes) 27 | audioFrameList = struct.unpack("<" + byteCode * nframes, waveData) 28 | 29 | return audioFrameList, params 30 | 31 | 32 | def resampleAudio(soxEXE, newSampleRate, inputPath, fn, outputPath=None): 33 | """ 34 | 35 | Mac: "/opt/local/bin/sox" 36 | Windows: "C:\Program Files (x86)\sox-14-4-2\sox.exe" 37 | """ 38 | if outputPath is None: 39 | outputPath = join(inputPath, "resampled_wavs") 40 | utils.makeDir(outputPath) 41 | 42 | soxCmd = "%s %s -r %f %s rate -v 96k" % ( 43 | soxEXE, 44 | join(inputPath, fn), 45 | newSampleRate, 46 | join(outputPath, fn), 47 | ) 48 | os.system(soxCmd) 49 | 50 | 51 | def getSerializedFileDuration(fn): 52 | name = os.path.splitext(fn)[0] 53 | durationFN = name + "_duration.txt" 54 | if not os.path.exists(durationFN): 55 | duration = getSoundFileDuration(fn) 56 | try: 57 | with open(durationFN, "w") as fd: 58 | fd.write(str(duration)) 59 | except IOError: 60 | # If we don't have write permissions, there isn't anything we can 61 | # do, the user should still be able to get their data 62 | pass 63 | else: 64 | with open(durationFN, "r") as fd: 65 | duration = float(fd.read()) 66 | 67 | return duration 68 | 69 | 70 | def getSoundFileDuration(fn): 71 | """ 72 | Returns the duration of a wav file (in seconds) 73 | """ 74 | audiofile = wave.open(fn, "r") 75 | 76 | params = audiofile.getparams() 77 | framerate = params[2] 78 | nframes = params[3] 79 | 80 | duration = float(nframes) / framerate 81 | return duration 82 | 83 | 84 | def getParams(fn): 85 | audiofile = wave.open(fn, "r") 86 | 87 | params = audiofile.getparams() 88 | 89 | return params 90 | 91 | 92 | def reduceToSingleChannel(fn, outputFN, leftFactor=1, rightFactor=0): 93 | audiofile = wave.open(fn, "r") 94 | 95 | params = audiofile.getparams() 96 | sampwidth = params[1] 97 | nframes = params[3] 98 | audioFrames = audiofile.readframes(nframes) 99 | 100 | monoAudioFrames = audioop.tomono(audioFrames, sampwidth, leftFactor, rightFactor) 101 | params = tuple( 102 | [ 103 | 1, 104 | ] 105 | + list(params[1:]) 106 | ) 107 | 108 | outputAudiofile = wave.open(outputFN, "w") 109 | outputAudiofile.setparams(params) 110 | outputAudiofile.writeframes(monoAudioFrames) 111 | 112 | 113 | def modifySampleWidth(fn, outputFN, newSampleWidth): 114 | sampWidthDict = {1: "b", 2: "h", 4: "i", 8: "q"} 115 | 116 | audiofile = wave.open(fn, "r") 117 | params = audiofile.getparams() 118 | sampwidth = params[1] 119 | nframes = params[3] 120 | waveData = audiofile.readframes(nframes) 121 | 122 | sampleCode = sampWidthDict[sampwidth] 123 | newSampleCode = sampWidthDict[newSampleWidth] 124 | 125 | audioFrameList = struct.unpack("<" + sampleCode * nframes, waveData) 126 | outputByteStr = struct.pack("<" + newSampleCode * nframes, *audioFrameList) 127 | 128 | if newSampleWidth is not None: 129 | params = ( 130 | list(params[:2]) 131 | + [ 132 | newSampleWidth, 133 | ] 134 | + list(params[3:]) 135 | ) 136 | params = tuple(params) 137 | 138 | outputAudiofile = wave.open(outputFN, "w") 139 | outputAudiofile.setparams(params) 140 | outputAudiofile.writeframes(outputByteStr) 141 | 142 | 143 | def monoToStereo(fnL, fnR, outputFN, lfactor=1.0, rfactor=1.0): 144 | """ 145 | Given two audio files, combines them into a stereo audio file 146 | 147 | Derived mostly from the official python documentation 148 | https://docs.python.org/2/library/audioop.html 149 | """ 150 | 151 | def _monoToStereo(fn, leftBalance, rightBalance): 152 | audiofile = wave.open(fn, "r") 153 | params = audiofile.getparams() 154 | sampwidth = params[1] 155 | nframes = params[3] 156 | 157 | waveData = audiofile.readframes(nframes) 158 | sample = audioop.tostereo(waveData, sampwidth, leftBalance, rightBalance) 159 | 160 | return sample, params 161 | 162 | lsample, params = _monoToStereo(fnL, lfactor, 1 - lfactor) 163 | rsample = _monoToStereo(fnR, 1 - rfactor, rfactor)[0] 164 | 165 | sampwidth, framerate, nframes, comptype, compname = params[1:] 166 | 167 | stereoSamples = audioop.add(lsample, rsample, sampwidth) 168 | 169 | outputAudiofile = wave.open(outputFN, "w") 170 | 171 | params = [2, sampwidth, framerate, nframes, comptype, compname] 172 | outputAudiofile.setparams(params) 173 | outputAudiofile.writeframes(stereoSamples) 174 | 175 | 176 | def splitStereoAudio(path, fn, outputPath=None): 177 | if outputPath is None: 178 | outputPath = join(path, "split_audio") 179 | 180 | if not os.path.exists(outputPath): 181 | os.mkdir(outputPath) 182 | 183 | name = os.path.splitext(fn)[0] 184 | 185 | fnFullPath = join(path, fn) 186 | leftOutputFN = join(outputPath, "%s_L.wav" % name) 187 | rightOutputFN = join(outputPath, "%s_R.wav" % name) 188 | 189 | audiofile = wave.open(fnFullPath, "r") 190 | 191 | params = audiofile.getparams() 192 | sampwidth = params[1] 193 | nframes = params[3] 194 | audioFrames = audiofile.readframes(nframes) 195 | 196 | for leftFactor, rightFactor, outputFN in ( 197 | (1, 0, leftOutputFN), 198 | (0, 1, rightOutputFN), 199 | ): 200 | monoAudioFrames = audioop.tomono( 201 | audioFrames, sampwidth, leftFactor, rightFactor 202 | ) 203 | params = tuple( 204 | [ 205 | 1, 206 | ] 207 | + list(params[1:]) 208 | ) 209 | 210 | outputAudiofile = wave.open(outputFN, "w") 211 | outputAudiofile.setparams(params) 212 | outputAudiofile.writeframes(monoAudioFrames) 213 | 214 | 215 | def getSubwav(fn, startT, endT, singleChannelFlag): 216 | audiofile = wave.open(fn, "r") 217 | 218 | params = audiofile.getparams() 219 | nchannels = params[0] 220 | sampwidth = params[1] 221 | framerate = params[2] 222 | 223 | # Extract the audio frames 224 | audiofile.setpos(int(framerate * startT)) 225 | audioFrames = audiofile.readframes(int(framerate * (endT - startT))) 226 | 227 | # Convert to single channel if needed 228 | if singleChannelFlag is True and nchannels > 1: 229 | audioFrames = audioop.tomono(audioFrames, sampwidth, 1, 0) 230 | nchannels = 1 231 | 232 | return audioFrames 233 | 234 | 235 | def extractSubwav(fn, outputFN, startT, endT, singleChannelFlag): 236 | audiofile = wave.open(fn, "r") 237 | params = audiofile.getparams() 238 | nchannels = params[0] 239 | sampwidth = params[1] 240 | framerate = params[2] 241 | comptype = params[4] 242 | compname = params[5] 243 | 244 | print([fn, startT, endT]) 245 | audioFrames = getSubwav(fn, startT, endT, singleChannelFlag) 246 | 247 | if singleChannelFlag is True and nchannels > 1: 248 | nchannels = 1 249 | 250 | outParams = [nchannels, sampwidth, framerate, len(audioFrames), comptype, compname] 251 | 252 | outWave = wave.open(outputFN, "w") 253 | outWave.setparams(outParams) 254 | outWave.writeframes(audioFrames) 255 | -------------------------------------------------------------------------------- /pyacoustics/signals/data_fitting.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jul 6, 2015 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | from sklearn import mixture 8 | 9 | from scipy import stats 10 | from scipy.stats import norm 11 | import matplotlib.pyplot as plot 12 | import matplotlib.mlab 13 | import numpy as np 14 | 15 | 16 | def getPDF(ddata, numSamples=50, minV=None, maxV=None): 17 | pdf = stats.gaussian_kde(ddata) 18 | 19 | if minV is None: 20 | minV = min(ddata) 21 | if maxV is None: 22 | maxV = max(ddata) 23 | 24 | xValues = np.linspace(minV, maxV, numSamples) 25 | 26 | yValues = pdf(xValues) 27 | 28 | return xValues, yValues 29 | 30 | 31 | def getBimodalValley(data, numSamples=100, doplot=True): 32 | """ 33 | Returns the smallest value between the peaks of a bimodal distribution 34 | """ 35 | 36 | # Build GMM, fit it to the data, and get GMM parameters 37 | # The two means are used as the start and end point of a our search 38 | # for the smallest value between the two distributions. 39 | 40 | ncomp = 2 # Could be parameterized later if needed 41 | 42 | clf = mixture.GaussianMixture(n_components=ncomp, covariance_type="full") 43 | clf.fit( 44 | [ 45 | [ 46 | item, 47 | ] 48 | for item in data 49 | ] 50 | ) 51 | ml = clf.means_ 52 | wl = clf.weights_ 53 | cl = clf.covariances_ 54 | ms = [m[0] for m in ml] 55 | cs = [np.sqrt(c[0][0]) for c in cl] 56 | ws = [w for w in wl] 57 | 58 | # Find the smallest point in the pdf between the means 59 | startV = int(min(ms)) 60 | endV = int(max(ms)) 61 | 62 | pdfX, pdfY = getPDF(data, numSamples, startV, endV) 63 | minY = min(pdfY) 64 | minX = pdfX[[float(x) for x in pdfY].index(minY)] 65 | 66 | # Plot result if requested 67 | if doplot is True: 68 | histo = plot.hist(data, numSamples) 69 | for w, m, c in zip(ws, ms, cs): 70 | normedPDF = norm.pdf(histo[1], m, np.sqrt(c)) 71 | plot.plot(histo[1], w * normedPDF, linewidth=3) 72 | plot.plot(pdfX, pdfY, linewidth=2) 73 | plot.axvline(minX) 74 | plot.show() 75 | 76 | return minX 77 | -------------------------------------------------------------------------------- /pyacoustics/speech_detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/speech_detection/__init__.py -------------------------------------------------------------------------------- /pyacoustics/speech_detection/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jun 7, 2015 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import struct 8 | import wave 9 | import math 10 | 11 | from pyacoustics.signals import audio_scripts 12 | 13 | 14 | class EndOfAudioData(Exception): 15 | pass 16 | 17 | 18 | def getSoundFileDuration(fn): 19 | """ 20 | Returns the duration of a wav file (in seconds) 21 | """ 22 | audiofile = wave.open(fn, "r") 23 | 24 | params = audiofile.getparams() 25 | framerate = params[2] 26 | nframes = params[3] 27 | 28 | duration = float(nframes) / framerate 29 | return duration 30 | 31 | 32 | def openAudioFile(fn): 33 | audiofile = wave.open(fn, "r") 34 | 35 | params = audiofile.getparams() 36 | sampwidth = params[1] 37 | framerate = params[2] 38 | 39 | return audiofile, sampwidth, framerate 40 | 41 | 42 | def rms(audioFrameList): 43 | audioFrameList = [val**2 for val in audioFrameList] 44 | meanVal = sum(audioFrameList) / len(audioFrameList) 45 | return math.sqrt(meanVal) 46 | 47 | 48 | def overlapCheck(interval, cmprInterval, percentThreshold=0): 49 | """Checks whether two intervals overlap""" 50 | 51 | startTime, endTime = interval[0], interval[1] 52 | cmprStartTime, cmprEndTime = cmprInterval[0], cmprInterval[1] 53 | 54 | overlapTime = min(endTime, cmprEndTime) - max(startTime, cmprStartTime) 55 | overlapTime = max(0, overlapTime) 56 | overlapFlag = overlapTime > 0 57 | 58 | if percentThreshold > 0 and overlapFlag: 59 | totalTime = max(endTime, cmprEndTime) - min(startTime, cmprStartTime) 60 | percentOverlap = overlapTime / float(totalTime) 61 | 62 | overlapFlag = percentOverlap >= percentThreshold 63 | 64 | return overlapFlag 65 | 66 | 67 | def getMinMaxAmplitude(wavFN, stepSize, entryList=None): 68 | audiofile = openAudioFile(wavFN)[0] 69 | 70 | # By default, find the min and max amplitude for the whole file 71 | if entryList is None: 72 | stop = audio_scripts.getSoundFileDuration(wavFN) 73 | entryList = [ 74 | (0, stop), 75 | ] 76 | 77 | # Accumulate relevant energy values 78 | rmsList = [] 79 | for entry in entryList: 80 | start, stop = entry[0], entry[1] 81 | currentTime = start 82 | while currentTime < stop: 83 | rmsList.append(rmsNextFrames(audiofile, stepSize)) 84 | currentTime += stepSize 85 | 86 | # Return the min and max values 87 | minValue = min(rmsList) 88 | maxValue = max(rmsList) 89 | 90 | return minValue, maxValue 91 | 92 | 93 | def rmsNextFrames(audiofile, stepSize, normMinVal=None, normMaxVal=None): 94 | params = audiofile.getparams() 95 | sampwidth, framerate = params[1], params[2] 96 | 97 | numFrames = int(framerate * stepSize) 98 | waveData = audiofile.readframes(numFrames) 99 | 100 | if len(waveData) == 0: 101 | raise EndOfAudioData() 102 | 103 | actualNumFrames = int(len(waveData) / float(sampwidth)) 104 | audioFrameList = struct.unpack("<" + "h" * actualNumFrames, waveData) 105 | 106 | rmsEnergy = rms(audioFrameList) 107 | 108 | if normMinVal is not None and normMaxVal is not None: 109 | rmsEnergy = (rmsEnergy - normMinVal) / (normMaxVal - normMinVal) 110 | 111 | return rmsEnergy 112 | 113 | 114 | def mergeAdjacentEntries(entryList): 115 | i = 0 116 | while i < len(entryList) - 1: 117 | if entryList[i][1] == entryList[i + 1][0]: 118 | startEntry = entryList.pop(i) 119 | nextEntry = entryList.pop(i) 120 | 121 | entryList.insert(i, (startEntry[0], nextEntry[1])) 122 | else: 123 | i += 1 124 | 125 | return entryList 126 | 127 | 128 | def cropUnusedPortion(entry, start, stop): 129 | retEntryList = [] 130 | 131 | if entry[0] < start: 132 | retEntryList.append((entry[0], start)) 133 | 134 | if entry[1] > stop: 135 | retEntryList.append((stop, entry[1])) 136 | 137 | return retEntryList 138 | -------------------------------------------------------------------------------- /pyacoustics/speech_detection/naive_vad.py: -------------------------------------------------------------------------------- 1 | import wave 2 | 3 | from pyacoustics.speech_detection import common 4 | 5 | 6 | def _findNextEvent( 7 | sampleList, 8 | startTime, 9 | silenceThreshold, 10 | sampleFreq, 11 | stepSize, 12 | numSteps, 13 | findSilence=True, 14 | ): 15 | """ 16 | 17 | if findSilence=False then search for sound 18 | """ 19 | 20 | # Extract the audio frames 21 | i = 0 22 | currentSequenceNum = 0 23 | while currentSequenceNum < numSteps: 24 | currentTime = startTime + i * stepSize 25 | nextTime = startTime + (i + 1) * stepSize 26 | 27 | audioFrameList = sampleList[ 28 | int(round(currentTime * sampleFreq)) : int(round(nextTime * sampleFreq)) 29 | ] 30 | 31 | if len(audioFrameList) == 0: 32 | raise common.EndOfAudioData() 33 | 34 | rmsEnergy = common.rms(audioFrameList) 35 | 36 | if (findSilence is True and rmsEnergy < silenceThreshold) or ( 37 | findSilence is False and rmsEnergy > silenceThreshold 38 | ): 39 | currentSequenceNum += 1 40 | else: 41 | currentSequenceNum = 0 42 | i += 1 43 | 44 | endTime = startTime + (i - numSteps) * stepSize 45 | 46 | return endTime 47 | 48 | 49 | def naiveVAD( 50 | sampleList, silenceThreshold, sampleFreq, stepSize, numSteps, startTime=0.0 51 | ): 52 | endTime = _findNextEvent( 53 | sampleList, 54 | startTime, 55 | silenceThreshold, 56 | sampleFreq, 57 | stepSize, 58 | numSteps, 59 | findSilence=True, 60 | ) 61 | 62 | # Each iteration begins at a non-silence event and ends in a new 63 | # silence event (i.e. spans the interval of the non-silence) 64 | entryList = [] 65 | try: 66 | while True: 67 | startTime = _findNextEvent( 68 | sampleList, 69 | endTime, 70 | silenceThreshold, 71 | sampleFreq, 72 | stepSize, 73 | numSteps, 74 | findSilence=False, 75 | ) 76 | 77 | endTime = _findNextEvent( 78 | sampleList, 79 | startTime, 80 | silenceThreshold, 81 | sampleFreq, 82 | stepSize, 83 | numSteps, 84 | findSilence=True, 85 | ) 86 | entryList.append((startTime, endTime)) 87 | 88 | except (common.EndOfAudioData, wave.Error): 89 | pass # Stop processing 90 | 91 | return entryList 92 | 93 | 94 | def getIntensityPercentile(sampleList, cutoffPercent): 95 | """ 96 | Returns the nth percent of energy represented in a dataset 97 | """ 98 | tmpSampleList = sorted(sampleList) 99 | 100 | return tmpSampleList[int(len(tmpSampleList) * cutoffPercent)] 101 | 102 | 103 | def cropSilenceInEdges(sampleList, silenceThreshold, sampleFreq): 104 | """ 105 | Returns the left and right boundaries of the meaningful data in a wav file 106 | """ 107 | startI = 0 108 | while sampleList[startI] < silenceThreshold: 109 | startI += 1 110 | 111 | endI = len(sampleList) - 1 112 | while sampleList[endI] < silenceThreshold: 113 | endI -= 1 114 | 115 | startTime = startI * sampleFreq 116 | endTime = endI * sampleFreq 117 | 118 | return startTime, endTime 119 | -------------------------------------------------------------------------------- /pyacoustics/speech_detection/naive_vad_efficient.py: -------------------------------------------------------------------------------- 1 | import math 2 | import struct 3 | import wave 4 | 5 | from pyacoustics.speech_detection import common 6 | 7 | 8 | def findNextEvent( 9 | fn, startTime, silenceThreshold, stepSize, numSteps, findSilence=True 10 | ): 11 | """ 12 | 13 | if findSilence=False then search for sound 14 | """ 15 | 16 | audiofile, sampwidth, framerate = common.openAudioFile(fn) 17 | 18 | # Extract the audio frames 19 | i = 0 20 | currentSequenceNum = 0 21 | audiofile.setpos(int(framerate * startTime)) 22 | while currentSequenceNum < numSteps: 23 | numFrames = int(framerate * stepSize) 24 | waveData = audiofile.readframes(numFrames) 25 | 26 | if len(waveData) == 0: 27 | raise common.EndOfAudioData() 28 | 29 | actualNumFrames = int(len(waveData) / float(sampwidth)) 30 | audioFrameList = struct.unpack("<" + "h" * actualNumFrames, waveData) 31 | 32 | rmsEnergy = common.rms(audioFrameList) 33 | print(rmsEnergy) 34 | 35 | if (findSilence is True and rmsEnergy < silenceThreshold) or ( 36 | findSilence is False and rmsEnergy > silenceThreshold 37 | ): 38 | currentSequenceNum += 1 39 | else: 40 | currentSequenceNum = 0 41 | i += 1 42 | 43 | endTime = startTime + (i - numSteps) * stepSize 44 | 45 | return endTime 46 | 47 | 48 | def naiveVAD(wavFN, silenceThreshold, stepSize, numSteps, startTime=0.0): 49 | endTime = findNextEvent( 50 | wavFN, startTime, silenceThreshold, stepSize, numSteps, findSilence=True 51 | ) 52 | 53 | # Each iteration begins at a non-silence event and ends in a new 54 | # silence event (i.e. spans the interval of the non-silence) 55 | entryList = [] 56 | try: 57 | while True: 58 | startTime = findNextEvent( 59 | wavFN, endTime, silenceThreshold, stepSize, numSteps, findSilence=False 60 | ) 61 | 62 | endTime = findNextEvent( 63 | wavFN, startTime, silenceThreshold, stepSize, numSteps, findSilence=True 64 | ) 65 | entryList.append((startTime, endTime)) 66 | 67 | except (common.EndOfAudioData, wave.Error): 68 | pass # Stop processing 69 | 70 | return entryList 71 | -------------------------------------------------------------------------------- /pyacoustics/speech_detection/segment_stereo_speech.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Nov 4, 2014 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | from pyacoustics.speech_detection import common 8 | 9 | 10 | def findNextSpeaker( 11 | leftSamples, 12 | rightSamples, 13 | samplingFreq, 14 | startTime, 15 | analyzeStop, 16 | stepSize, 17 | numSteps, 18 | findLeft=True, 19 | ): 20 | """""" 21 | 22 | # Extract the audio frames 23 | i = 0 24 | currentSequenceNum = 0 25 | while currentSequenceNum < numSteps: 26 | # Stop analyzing once we've reached the end of this interval 27 | currentTime = startTime + i * stepSize 28 | nextTime = startTime + ((i + 1) * stepSize) 29 | 30 | if nextTime > analyzeStop: 31 | raise common.EndOfAudioData() 32 | 33 | leftRMSEnergy = common.rms( 34 | leftSamples[int(currentTime * samplingFreq) : int(nextTime * samplingFreq)] 35 | ) 36 | rightRMSEnergy = common.rms( 37 | rightSamples[int(currentTime * samplingFreq) : int(nextTime * samplingFreq)] 38 | ) 39 | 40 | if (findLeft is True and leftRMSEnergy >= rightRMSEnergy) or ( 41 | findLeft is False and leftRMSEnergy <= rightRMSEnergy 42 | ): 43 | currentSequenceNum += 1 44 | else: 45 | currentSequenceNum = 0 46 | i += 1 47 | 48 | endTime = startTime + (i - numSteps) * stepSize 49 | 50 | return endTime 51 | 52 | 53 | def assignAudioEventsForEntries( 54 | leftSamples, 55 | rightSamples, 56 | samplingFreq, 57 | leftEntry, 58 | rightEntry, 59 | stepSize, 60 | speakerNumSteps, 61 | ): 62 | """ 63 | Start up and tear down function for assignAudioEvents() 64 | """ 65 | 66 | # Find the overlap interval and preserve the non-overlapped portions 67 | start = max(leftEntry[0], rightEntry[0]) 68 | stop = min(leftEntry[1], rightEntry[1]) 69 | 70 | leftEntryList = common.cropUnusedPortion(leftEntry, start, stop) 71 | rightEntryList = common.cropUnusedPortion(rightEntry, start, stop) 72 | 73 | # Determine who is speaking in overlapped portions 74 | tmpEntries = assignAudioEvents( 75 | leftSamples, rightSamples, samplingFreq, start, stop, stepSize, speakerNumSteps 76 | ) 77 | 78 | leftEntryList.extend(tmpEntries[0]) 79 | rightEntryList.extend(tmpEntries[1]) 80 | 81 | # Merge adjacent regions sharing a boundary, if any 82 | leftEntryList.sort() 83 | rightEntryList.sort() 84 | 85 | leftEntryList = common.mergeAdjacentEntries(leftEntryList) 86 | rightEntryList = common.mergeAdjacentEntries(rightEntryList) 87 | 88 | return leftEntryList, rightEntryList 89 | 90 | 91 | def assignAudioEvents( 92 | leftSamples, 93 | rightSamples, 94 | samplingFreq, 95 | startTime, 96 | analyzeStop, 97 | stepSize, 98 | speakerNumSteps, 99 | ): 100 | findLeft = True 101 | leftEntryList = [] 102 | rightEntryList = [] 103 | try: 104 | while True: 105 | endTime = findNextSpeaker( 106 | leftSamples, 107 | rightSamples, 108 | samplingFreq, 109 | startTime, 110 | analyzeStop, 111 | stepSize, 112 | speakerNumSteps, 113 | findLeft, 114 | ) 115 | 116 | if endTime > analyzeStop: 117 | endTime = analyzeStop 118 | 119 | if startTime != endTime: 120 | entry = (startTime, endTime) 121 | if findLeft: 122 | leftEntryList.append(entry) 123 | else: 124 | rightEntryList.append(entry) 125 | 126 | print("%f, %f, %f" % (startTime, endTime, analyzeStop)) 127 | startTime = endTime 128 | findLeft = not findLeft 129 | 130 | except common.EndOfAudioData: # Stop processing 131 | if analyzeStop - startTime > stepSize * speakerNumSteps: 132 | finalEntry = (startTime, analyzeStop) 133 | if findLeft: 134 | leftEntryList.append(finalEntry) 135 | else: 136 | rightEntryList.append(finalEntry) 137 | 138 | return leftEntryList, rightEntryList 139 | 140 | 141 | def autosegmentStereoAudio( 142 | leftSamples, 143 | rightSamples, 144 | samplingFreq, 145 | leftEntryList, 146 | rightEntryList, 147 | stepSize, 148 | speakerNumSteps, 149 | ): 150 | overlapThreshold = 0 151 | overlapCheck = lambda entry, entryList: [ 152 | not common.overlapCheck(entry, cmprEntry, overlapThreshold) 153 | for cmprEntry in entryList 154 | ] 155 | 156 | # First add all of the entries with no overlap 157 | newLeftEntryList = [] 158 | for leftEntry in leftEntryList: 159 | if all(overlapCheck(leftEntry, rightEntryList)): 160 | newLeftEntryList.append(leftEntry) 161 | 162 | newRightEntryList = [] 163 | for rightEntry in rightEntryList: 164 | if all(overlapCheck(rightEntry, leftEntryList)): 165 | newRightEntryList.append(rightEntry) 166 | 167 | # For all entries with overlap, split them by speaker 168 | # Utilizing the left channel as a base, this chunks through all overlapping 169 | # in a single pass of the left channel, until there are no more overlapping 170 | # segments between the right and left channels. 171 | i = 0 172 | while i < len(leftEntryList): 173 | # Check if there are any segments in the right channel that overlap 174 | # with the current segment in the left channel. If not, move to 175 | # the next segment. 176 | leftEntry = leftEntryList[i] 177 | overlapCheckList = overlapCheck(leftEntry, rightEntryList) 178 | if all(overlapCheckList): 179 | i += 1 180 | continue 181 | 182 | # Otherwise, resolve the first segment in the right channel that 183 | # overlaps with the current segment 184 | leftEntry = leftEntryList.pop(i) 185 | 186 | j = overlapCheckList.index(False) # Find the first overlap 187 | rightEntry = rightEntryList.pop(j) 188 | 189 | entryTuple = assignAudioEventsForEntries( 190 | leftSamples, 191 | rightSamples, 192 | samplingFreq, 193 | leftEntry, 194 | rightEntry, 195 | stepSize, 196 | speakerNumSteps, 197 | ) 198 | tmpLeftEntryList, tmpRightEntryList = entryTuple 199 | 200 | leftEntryList[i:i] = tmpLeftEntryList 201 | rightEntryList[j:j] = tmpRightEntryList 202 | 203 | # Combine the original non-overlapping segments with the adjusted segments 204 | newLeftEntryList.extend(leftEntryList) 205 | newRightEntryList.extend(rightEntryList) 206 | 207 | newLeftEntryList.sort() 208 | newRightEntryList.sort() 209 | 210 | newLeftEntryList = [ 211 | entry 212 | for entry in newLeftEntryList 213 | if (entry[1] - entry[0] > stepSize * speakerNumSteps) 214 | ] 215 | newRightEntryList = [ 216 | entry 217 | for entry in newRightEntryList 218 | if (entry[1] - entry[0] > stepSize * speakerNumSteps) 219 | ] 220 | 221 | return newLeftEntryList, newRightEntryList 222 | -------------------------------------------------------------------------------- /pyacoustics/speech_detection/segment_stereo_speech_efficient.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Nov 4, 2014 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | from pyacoustics.speech_detection import common 8 | 9 | 10 | def findNextSpeaker( 11 | leftFN, 12 | rightFN, 13 | startTime, 14 | analyzeStop, 15 | stepSize, 16 | numSteps, 17 | findLeft=True, 18 | leftMin=None, 19 | leftMax=None, 20 | rightMin=None, 21 | rightMax=None, 22 | ): 23 | """""" 24 | 25 | audioTuple = common.openAudioFile(leftFN) 26 | leftAudioFile = audioTuple[0] 27 | framerate = audioTuple[2] 28 | rightAudioFile = common.openAudioFile(rightFN)[0] 29 | 30 | # Extract the audio frames 31 | i = 0 32 | currentSequenceNum = 0 33 | leftAudioFile.setpos(int(framerate * startTime)) 34 | rightAudioFile.setpos(int(framerate * startTime)) 35 | while currentSequenceNum < numSteps: 36 | # Stop analyzing once we've reached the end of this interval 37 | currentTime = startTime + i * stepSize 38 | 39 | if currentTime >= analyzeStop: 40 | raise common.EndOfAudioData() 41 | 42 | leftRMSEnergy = common.rmsNextFrames(leftAudioFile, stepSize, leftMin, leftMax) 43 | rightRMSEnergy = common.rmsNextFrames( 44 | rightAudioFile, stepSize, rightMin, rightMax 45 | ) 46 | 47 | if (findLeft is True and leftRMSEnergy >= rightRMSEnergy) or ( 48 | findLeft is False and leftRMSEnergy <= rightRMSEnergy 49 | ): 50 | currentSequenceNum += 1 51 | else: 52 | currentSequenceNum = 0 53 | i += 1 54 | 55 | endTime = startTime + (i - numSteps) * stepSize 56 | 57 | return endTime 58 | 59 | 60 | def assignAudioEventsForEntries( 61 | leftFN, 62 | rightFN, 63 | leftEntry, 64 | rightEntry, 65 | stepSize, 66 | speakerNumSteps, 67 | leftMin, 68 | leftMax, 69 | rightMin, 70 | rightMax, 71 | ): 72 | """ 73 | Start up and tear down function for assignAudioEvents() 74 | """ 75 | 76 | # Find the overlap interval and preserve the non-overlapped portions 77 | start = max(leftEntry[0], rightEntry[0]) 78 | stop = min(leftEntry[1], rightEntry[1]) 79 | 80 | leftEntryList = common.cropUnusedPortion(leftEntry, start, stop) 81 | rightEntryList = common.cropUnusedPortion(rightEntry, start, stop) 82 | 83 | # Determine who is speaking in overlapped portions 84 | tmpEntries = assignAudioEvents( 85 | leftFN, 86 | rightFN, 87 | start, 88 | stop, 89 | stepSize, 90 | speakerNumSteps, 91 | leftMin, 92 | leftMax, 93 | rightMin, 94 | rightMax, 95 | ) 96 | 97 | leftEntryList.extend(tmpEntries[0]) 98 | rightEntryList.extend(tmpEntries[1]) 99 | 100 | # Merge adjacent regions sharing a boundary, if any 101 | leftEntryList.sort() 102 | rightEntryList.sort() 103 | 104 | leftEntryList = common.mergeAdjacentEntries(leftEntryList) 105 | rightEntryList = common.mergeAdjacentEntries(rightEntryList) 106 | 107 | return leftEntryList, rightEntryList 108 | 109 | 110 | def assignAudioEvents( 111 | leftFN, 112 | rightFN, 113 | startTime, 114 | analyzeStop, 115 | stepSize, 116 | speakerNumSteps, 117 | leftMin, 118 | leftMax, 119 | rightMin, 120 | rightMax, 121 | ): 122 | findLeft = True 123 | leftEntryList = [] 124 | rightEntryList = [] 125 | try: 126 | while True: 127 | endTime = findNextSpeaker( 128 | leftFN, 129 | rightFN, 130 | startTime, 131 | analyzeStop, 132 | stepSize, 133 | speakerNumSteps, 134 | findLeft, 135 | leftMin, 136 | leftMax, 137 | rightMin, 138 | rightMax, 139 | ) 140 | 141 | if endTime > analyzeStop: 142 | endTime = analyzeStop 143 | 144 | if startTime != endTime: 145 | entry = (startTime, endTime) 146 | if findLeft: 147 | leftEntryList.append(entry) 148 | else: 149 | rightEntryList.append(entry) 150 | 151 | print("%f, %f, %f" % (startTime, endTime, analyzeStop)) 152 | startTime = endTime 153 | findLeft = not findLeft 154 | 155 | except common.EndOfAudioData: # Stop processing 156 | if analyzeStop - startTime > stepSize * speakerNumSteps: 157 | finalEntry = (startTime, analyzeStop) 158 | if findLeft: 159 | leftEntryList.append(finalEntry) 160 | else: 161 | rightEntryList.append(finalEntry) 162 | 163 | return leftEntryList, rightEntryList 164 | 165 | 166 | def autosegmentStereoAudio( 167 | leftFN, rightFN, leftEntryList, rightEntryList, stepSize, speakerNumSteps 168 | ): 169 | overlapThreshold = 0 170 | overlapCheck = lambda entry, entryList: [ 171 | not common.overlapCheck(entry, cmprEntry, overlapThreshold) 172 | for cmprEntry in entryList 173 | ] 174 | 175 | # Find the min and max intensity levels for normalizing later 176 | leftMin, leftMax = common.getMinMaxAmplitude(leftFN, stepSize, leftEntryList) 177 | rightMin, rightMax = common.getMinMaxAmplitude(rightFN, stepSize, rightEntryList) 178 | 179 | # First add all of the entries with no overlap 180 | newLeftEntryList = [] 181 | for leftEntry in leftEntryList: 182 | if all(overlapCheck(leftEntry, rightEntryList)): 183 | newLeftEntryList.append(leftEntry) 184 | 185 | newRightEntryList = [] 186 | for rightEntry in rightEntryList: 187 | if all(overlapCheck(rightEntry, leftEntryList)): 188 | newRightEntryList.append(rightEntry) 189 | 190 | # For all entries with overlap, split them by speaker 191 | # Utilizing the left channel as a base, this chunks through all overlapping 192 | # in a single pass of the left channel, until there are no more overlapping 193 | # segments between the right and left channels. 194 | i = 0 195 | while i < len(leftEntryList): 196 | # Check if there are any segments in the right channel that overlap 197 | # with the current segment in the left channel. If not, move to 198 | # the next segment. 199 | leftEntry = leftEntryList[i] 200 | overlapCheckList = overlapCheck(leftEntry, rightEntryList) 201 | if all(overlapCheckList): 202 | i += 1 203 | continue 204 | 205 | # Otherwise, resolve the first segment in the right channel that 206 | # overlaps with the current segment 207 | leftEntry = leftEntryList.pop(i) 208 | 209 | j = overlapCheckList.index(False) # Find the first overlap 210 | rightEntry = rightEntryList.pop(j) 211 | 212 | entryTuple = assignAudioEventsForEntries( 213 | leftFN, 214 | rightFN, 215 | leftEntry, 216 | rightEntry, 217 | stepSize, 218 | speakerNumSteps, 219 | leftMin, 220 | leftMax, 221 | rightMin, 222 | rightMax, 223 | ) 224 | tmpLeftEntryList, tmpRightEntryList = entryTuple 225 | 226 | leftEntryList[i:i] = tmpLeftEntryList 227 | rightEntryList[j:j] = tmpRightEntryList 228 | 229 | # Combine the original non-overlapping segments with the adjusted segments 230 | newLeftEntryList.extend(leftEntryList) 231 | newRightEntryList.extend(rightEntryList) 232 | 233 | newLeftEntryList.sort() 234 | newRightEntryList.sort() 235 | 236 | newLeftEntryList = [ 237 | entry 238 | for entry in newLeftEntryList 239 | if (entry[1] - entry[0] > stepSize * speakerNumSteps) 240 | ] 241 | newRightEntryList = [ 242 | entry 243 | for entry in newRightEntryList 244 | if (entry[1] - entry[0] > stepSize * speakerNumSteps) 245 | ] 246 | 247 | return newLeftEntryList, newRightEntryList 248 | -------------------------------------------------------------------------------- /pyacoustics/speech_detection/split_on_tone.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Sep 6, 2014 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import os 8 | from os.path import join 9 | import math 10 | 11 | from pyacoustics.signals import audio_scripts 12 | from pyacoustics.utilities import sequences 13 | 14 | BEEP = "beep" 15 | SILENCE = "silence" 16 | SPEECH = "speech" 17 | 18 | 19 | def _homogenizeList(dataList, toneFrequency): 20 | """ 21 | Discritizes pitch values into one of three categories 22 | """ 23 | 24 | minVal = min(dataList) 25 | 26 | retDataList = [] 27 | for val in dataList: 28 | if val == toneFrequency: 29 | val = BEEP 30 | elif val == minVal: 31 | val = SILENCE 32 | else: 33 | val = SPEECH 34 | retDataList.append(val) 35 | 36 | return retDataList 37 | 38 | 39 | def splitFileOnTone(pitchList, timeStep, toneFrequency, eventDurationThreshold): 40 | """ 41 | Splits files by pure tones 42 | """ 43 | toneFrequency = int(round(toneFrequency, -1)) 44 | 45 | roundedPitchList = [int(round(val, -1)) for val in pitchList] 46 | codedPitchList = _homogenizeList(roundedPitchList, toneFrequency) 47 | 48 | compressedList = sequences.compressList(codedPitchList) 49 | timeDict = sequences.compressedListTransform( 50 | compressedList, 1.0 / timeStep, eventDurationThreshold 51 | ) 52 | 53 | # Fill in with empty lists if it didn't appear in the dataset 54 | # (eg no beeps were detected or no speech occurred) 55 | for key in [BEEP, SPEECH, SILENCE]: 56 | if key not in timeDict: 57 | timeDict[key] = [] 58 | 59 | return timeDict 60 | 61 | 62 | def extractSubwavs(timeDict, path, fn, outputPath): 63 | """ 64 | Extracts segments between tones marked in the output of splitFileOnTone() 65 | """ 66 | name = os.path.splitext(fn)[0] 67 | 68 | duration = audio_scripts.getSoundFileDuration(join(path, fn)) 69 | beepEntryList = timeDict[BEEP] 70 | segmentEntryList = sequences.invertIntervalList(beepEntryList, 0, duration) 71 | 72 | if len(segmentEntryList) > 0: 73 | numZeroes = int(math.floor(math.log10(len(segmentEntryList)))) + 1 74 | else: 75 | numZeroes = 1 76 | 77 | strFmt = "%%s_%%0%dd.wav" % numZeroes # e.g. '%s_%02d.wav' 78 | 79 | for i, entry in enumerate(segmentEntryList): 80 | start, stop = entry[:2] 81 | 82 | audio_scripts.extractSubwav( 83 | join(path, fn), 84 | join(outputPath, strFmt % (name, i)), 85 | startT=float(start), 86 | endT=float(stop), 87 | singleChannelFlag=True, 88 | ) 89 | -------------------------------------------------------------------------------- /pyacoustics/speech_detection/textgrids.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Nov 5, 2014 3 | 4 | @author: tmahrt 5 | 6 | Textgrid utilities for saving the output of speech detection code into 7 | praat textgrids. 8 | """ 9 | 10 | from praatio import textgrid 11 | 12 | 13 | def outputTextgrid(outputFN, duration, entryList, tierName): 14 | # Give all entries a label indicating their order of occurrence 15 | entryList.sort() 16 | newEntryList = [(entry[0], entry[1], str(i)) for i, entry in enumerate(entryList)] 17 | 18 | # Output textgrid 19 | tierSpeech = textgrid.IntervalTier(tierName, newEntryList, 0, duration) 20 | 21 | tg = textgrid.Textgrid() 22 | tg.addTier(tierSpeech) 23 | tg.save(outputFN, format="short_textgrid", includeBlankSpaces=True) 24 | 25 | 26 | def outputStereoTextgrid( 27 | outputFN, duration, leftEntryList, rightEntryList, leftChannelName, rightChannelName 28 | ): 29 | # Give all entries a label indicating their order of occurrence 30 | leftEntryList.sort() 31 | newLeftEntryList = [ 32 | (entry[0], entry[1], str(i)) for i, entry in enumerate(leftEntryList) 33 | ] 34 | 35 | rightEntryList.sort() 36 | newRightEntryList = [ 37 | (entry[0], entry[1], str(i)) for i, entry in enumerate(rightEntryList) 38 | ] 39 | 40 | # This shouldn't be necessary 41 | newLeftEntryList = [ 42 | entry 43 | for entry in newLeftEntryList 44 | if entry[1] <= duration and entry[0] < entry[1] 45 | ] 46 | newRightEntryList = [ 47 | entry 48 | for entry in newRightEntryList 49 | if entry[1] <= duration and entry[0] < entry[1] 50 | ] 51 | 52 | # Output textgrid 53 | leftTier = textgrid.IntervalTier(leftChannelName, newLeftEntryList, 0, duration) 54 | rightTier = textgrid.IntervalTier(rightChannelName, newRightEntryList, 0, duration) 55 | 56 | outputTG = textgrid.Textgrid() 57 | outputTG.addTier(leftTier) 58 | outputTG.addTier(rightTier) 59 | 60 | outputTG.save(outputFN, format="short_textgrid", includeBlankSpaces=True) 61 | -------------------------------------------------------------------------------- /pyacoustics/speech_filters/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Oct 27, 2014 3 | 4 | @author: tmahrt 5 | """ 6 | -------------------------------------------------------------------------------- /pyacoustics/speech_filters/speech_shaped_noise.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Mar 18, 2016 3 | 4 | @author: timmahrt 5 | 6 | *Preface: I'm not an expert in noise. What I've written here is just how I 7 | (naively) understand this topic. 8 | 9 | The following code is used for generating speech-shaped noise and masking 10 | speech using it. Speech-shaped noise is white noise with the same spectral 11 | properties as speech. As individual people have different spectral qualities, 12 | speech shaped noise should ideally be generated for each individual. 13 | 14 | The process: 15 | - first, speech shaped noise is generated for a speaker's 16 | recordings via generateNoise() 17 | - second, the speaker's data is then masked using the generated noise 18 | via maskSpeech() 19 | 20 | The alternative process: 21 | - if the files used to generate the noise are the same files that need to 22 | be masked, use the convenience function batchMaskSpeakerData() 23 | 24 | Some guidelines: 25 | - at least 3 minutes of speech should be used. If less than that is used, 26 | the noise may contain harmonic components 27 | - silences can exist in the input 28 | 29 | Requires scipy and numpy 30 | 31 | See the bottom of this file for an example usage. 32 | """ 33 | 34 | import os 35 | from os.path import join 36 | 37 | import functools 38 | import wave 39 | 40 | import numpy as np 41 | from scipy.io import wavfile 42 | from scipy import signal 43 | from numpy import fft 44 | 45 | ########################### 46 | # start of pambox code 47 | # Copyright (c) 2014, Alexandre Chabot-Leclerc 48 | # See LICENSE file for more information 49 | ########################### 50 | 51 | 52 | def _dbspl(x, ac=False, offset=0.0): 53 | """Computes RMS value of signal in dB. 54 | 55 | By default, a signal with an RMS value of 1 will have a level of 0 dB 56 | SPL. 57 | 58 | Parameters 59 | ---------- 60 | x : array_like 61 | Signal for which to caculate the sound-pressure level. 62 | ac : bool 63 | Consider only the AC component of the signal, i.e. the mean is 64 | removed (Default value = False) 65 | offset : float 66 | Reference to convert between RMS and dB SPL. (Default value = 0.0) 67 | axis : int 68 | Axis on which to compute the SPL value (Default value = -1, last axis) 69 | 70 | Returns 71 | ------- 72 | ndarray 73 | Sound-pressure levels. 74 | 75 | References 76 | ---------- 77 | .. [1] Auditory Modeling Toolbox, Peter L. Soendergaard 78 | B. C. J. Moore. An Introduction to the Psychology of Hearing. Academic 79 | Press, 5th edition, 2003. 80 | 81 | See also 82 | -------- 83 | setdbspl 84 | rms 85 | """ 86 | x = np.asarray(x) 87 | return 20.0 * np.log10(_rms(x, ac)) + float(offset) 88 | 89 | 90 | def _read_wav_as_float(path): 91 | """Reads a wavefile as a float. 92 | Parameters 93 | ---------- 94 | path : string 95 | Path to the wave file. 96 | Returns 97 | ------- 98 | wav : ndarray 99 | """ 100 | _, signal = wavfile.read(path) 101 | if np.issubdtype(signal.dtype, np.integer): 102 | # Integer division here. The '1.0' converts the numbers to float. 103 | return signal.T / (1.0 * np.abs(np.iinfo(signal.dtype).min)) 104 | return signal.T 105 | 106 | 107 | def _write_wav(fname, fs, x, normalize=False): 108 | """Writes floating point numpy array to 16 bit wavfile. 109 | 110 | Convenience wrapper around the scipy.io.wavfile.write function. 111 | 112 | The '.wav' extension is added to the file if it is not part of the 113 | filename string. 114 | 115 | Inputs of type `np.float` are converted to `int16` before writing to file. 116 | 117 | Parameters 118 | ---------- 119 | fname : string 120 | Filename with path. 121 | fs : int 122 | Sampling frequency. 123 | x : array_like 124 | Signal with the shape N_channels x Length 125 | normalize : bool 126 | Scale the signal such that its maximum value is one. 127 | 128 | Returns 129 | ------- 130 | None 131 | 132 | """ 133 | # Make sure that the channels are the second dimension 134 | fs = np.int(fs) 135 | if not fname.endswith(".wav"): 136 | fname += ".wav" 137 | 138 | if x.shape[0] <= 2: 139 | x = x.T 140 | 141 | if np.issubdtype(x.dtype, np.float) and normalize: 142 | scaled = x / np.max(np.abs(x)) * (2**15 - 1) 143 | elif np.issubdtype(x.dtype, np.float): 144 | scaled = x * (2**15 - 1) 145 | else: 146 | scaled = x 147 | wavfile.write(fname, fs, scaled.astype("int16")) 148 | 149 | 150 | def _rms(x, ac=False, axis=-1): 151 | """Calculates the RMS value of a signal. 152 | 153 | Parameters 154 | ---------- 155 | x : array_like 156 | Signal. 157 | ac : bool 158 | Consider only the AC component of the signal. (Default value = False) 159 | axis : 160 | Axis on which to calculate the RMS value. The default is to calculate 161 | the RMS on the last dimensions, i.e. axis = -1. 162 | 163 | Returns 164 | ------- 165 | ndarray 166 | RMS value of the signal. 167 | 168 | """ 169 | x = np.asarray(x) 170 | if ac: 171 | if x.ndim > 1 and axis == -1: 172 | x_mean = x.mean(axis=axis)[..., np.newaxis] 173 | else: 174 | x_mean = x.mean(axis=axis) 175 | return np.linalg.norm((x - x_mean) / np.sqrt(x.shape[axis]), axis=axis) 176 | else: 177 | return np.linalg.norm(x / np.sqrt(x.shape[axis]), axis=axis) 178 | 179 | 180 | def _mix_noise(clean, noise, sent_level, snr=None): 181 | """Mix a signal signal noise at a given signal-to-noise ratio. 182 | 183 | Parameters 184 | ---------- 185 | clean : ndarray 186 | Clean signal. 187 | noise : ndarray 188 | Noise signal. 189 | sent_level : float 190 | Sentence level, in dB SPL. 191 | snr : 192 | Signal-to-noise ratio at which to mix the signals, in dB. If snr is 193 | `None`, no noise is mixed with the signal (Default value = None) 194 | 195 | Returns 196 | ------- 197 | tuple of ndarrays 198 | Returns the clean signal, the mixture, and the noise. 199 | 200 | """ 201 | 202 | # Pick a random section of the noise 203 | n_clean = len(clean) 204 | n_noise = len(noise) 205 | if n_noise > n_clean: 206 | start_idx = np.random.randint(n_noise - n_clean) 207 | noise = noise[start_idx : start_idx + n_clean] 208 | 209 | if snr is not None: 210 | # Get speech level and set noise level accordingly 211 | # clean_level = utils.dbspl(clean) 212 | # noise = utils.setdbspl(noise, clean_level - snr) 213 | noise = noise / _rms(noise) * 10 ** ((sent_level - snr) / 20) 214 | mix = clean + noise 215 | else: 216 | mix = clean 217 | 218 | return clean, mix, noise 219 | 220 | 221 | def _noise_from_signal(x, fs=40000, keep_env=False): 222 | """Create a noise with same spectrum as the input signal. 223 | 224 | Parameters 225 | ---------- 226 | x : array_like 227 | Input signal. 228 | fs : int 229 | Sampling frequency of the input signal. (Default value = 40000) 230 | keep_env : bool 231 | Apply the envelope of the original signal to the noise. (Default 232 | value = False) 233 | 234 | Returns 235 | ------- 236 | ndarray 237 | Noise signal. 238 | 239 | """ 240 | x = np.asarray(x) 241 | n_x = x.shape[-1] 242 | n_fft = next_pow_2(n_x) 243 | X = fft.rfft(x, next_pow_2(n_fft)) 244 | # Randomize phase. 245 | noise_mag = np.abs(X) * np.exp(2 * np.pi * 1j * np.random.random(X.shape[-1])) 246 | noise = np.real(fft.irfft(noise_mag, n_fft)) 247 | out = noise[:n_x] 248 | 249 | if keep_env: 250 | env = np.abs(signal.hilbert(x)) 251 | [bb, aa] = signal.butter(6, 50 / (fs / 2)) # 50 Hz LP filter 252 | env = signal.filtfilt(bb, aa, env) 253 | out *= env 254 | 255 | return out 256 | 257 | 258 | def next_pow_2(x): 259 | """Calculates the next power of 2 of a number.""" 260 | return int(pow(2, np.ceil(np.log2(x)))) 261 | 262 | 263 | ########################### 264 | # end of pambox code 265 | ########################### 266 | 267 | 268 | class NotListException(Exception): 269 | def __str__(self): 270 | return "Error. First argument must be a list of file names." 271 | 272 | 273 | class InconsistentFramerateException(Exception): 274 | def __init__(self, wavFNList, framerateList): 275 | super(InconsistentFramerateException, self).__init__() 276 | 277 | self.framerateDict = {} 278 | 279 | framerateSet = list(set(framerateList)) 280 | for framerate in framerateSet: 281 | self.framerateDict[framerate] = [] 282 | 283 | for wavFN, framerate in zip(wavFNList, framerateList): 284 | self.framerateDict[framerate].append(wavFN) 285 | 286 | def __str__(self): 287 | outputStr = "Error. All wave files must have the same framerate" 288 | 289 | for framerate, fnList in self.framerateDict.items(): 290 | outputStr += "\n%s: %s" % (framerate, repr(fnList)) 291 | 292 | return outputStr 293 | 294 | 295 | def _getFramerate(wavFN): 296 | audiofile = wave.open(wavFN, "r") 297 | params = audiofile.getparams() 298 | 299 | return params[2] 300 | 301 | 302 | def _getDuration(waveFN): 303 | """ 304 | Returns the duration of a wav file (in seconds) 305 | """ 306 | audiofile = wave.open(waveFN, "r") 307 | 308 | params = audiofile.getparams() 309 | framerate = params[2] 310 | nframes = params[3] 311 | 312 | duration = float(nframes) / framerate 313 | return duration 314 | 315 | 316 | def _getMatchFunc(pattern): 317 | """ 318 | An unsophisticated pattern matching function 319 | """ 320 | 321 | # '#' Marks word boundaries, so if there is more than one we need to do 322 | # something special to make sure we're not mis-representings them 323 | assert pattern.count("#") < 2 324 | 325 | def startsWith(subStr, fullStr): 326 | return fullStr[: len(subStr)] == subStr 327 | 328 | def endsWith(subStr, fullStr): 329 | return fullStr[-1 * len(subStr) :] == subStr 330 | 331 | def inStr(subStr, fullStr): 332 | return subStr in fullStr 333 | 334 | # Selection of the correct function 335 | if pattern[0] == "#": 336 | pattern = pattern[1:] 337 | cmpFunc = startsWith 338 | 339 | elif pattern[-1] == "#": 340 | pattern = pattern[:-1] 341 | cmpFunc = endsWith 342 | 343 | else: 344 | cmpFunc = inStr 345 | 346 | return functools.partial(cmpFunc, pattern) 347 | 348 | 349 | def findFiles( 350 | path, 351 | filterPaths=False, 352 | filterExt=None, 353 | filterPattern=None, 354 | skipIfNameInList=None, 355 | stripExt=False, 356 | addPath=False, 357 | ): 358 | """ 359 | The primary use is to find files in a folder spoken by the same speaker 360 | 361 | Feed the input of findFiles into generateSpeechShapedNoise() as the first 362 | argument. 363 | """ 364 | fnList = os.listdir(path) 365 | 366 | if filterPaths is True: 367 | fnList = [ 368 | folderName 369 | for folderName in fnList 370 | if os.path.isdir(os.path.join(path, folderName)) 371 | ] 372 | 373 | if filterExt is not None: 374 | splitFNList = [ 375 | [ 376 | fn, 377 | ] 378 | + list(os.path.splitext(fn)) 379 | for fn in fnList 380 | ] 381 | fnList = [fn for fn, name, ext in splitFNList if ext == filterExt] 382 | 383 | if filterPattern is not None: 384 | splitFNList = [ 385 | [ 386 | fn, 387 | ] 388 | + list(os.path.splitext(fn)) 389 | for fn in fnList 390 | ] 391 | matchFunc = _getMatchFunc(filterPattern) 392 | fnList = [fn for fn, name, ext in splitFNList if matchFunc(name)] 393 | 394 | if skipIfNameInList is not None: 395 | targetNameList = [os.path.splitext(fn)[0] for fn in skipIfNameInList] 396 | fnList = [fn for fn in fnList if os.path.splitext(fn)[0] not in targetNameList] 397 | 398 | if stripExt is True: 399 | fnList = [os.path.splitext(fn)[0] for fn in fnList] 400 | 401 | if addPath is True: 402 | fnList = [join(path, fn) for fn in fnList] 403 | 404 | fnList.sort() 405 | return fnList 406 | 407 | 408 | def generateNoise(inputFNList, outputFN, outputDuration=None): 409 | """ 410 | Generates a file of random noise within the spectrum provided by the input 411 | 412 | The input should contain at least 3 minutes of speech for best results. 413 | Silences can exist in with the speech. Multiple files can be considered 414 | for one speech shaped noise generation. 415 | 416 | With less than 3 minutes, the speech shaped noise might contain 417 | harmonic components. 418 | 419 | The output will have the same duration as the input, but if you don't need 420 | such a long file, you can truncate the output. 421 | """ 422 | 423 | # Input must be a list 424 | if not isinstance(inputFNList, list): 425 | raise NotListException() 426 | 427 | # Verify that all files have the same framerate 428 | framerateList = [_getFramerate(fn) for fn in inputFNList] 429 | framerate = framerateList[0] 430 | if not all([tmpFramerate == framerate for tmpFramerate in framerateList]): 431 | raise InconsistentFramerateException(inputFNList, framerateList) 432 | 433 | outputPath = os.path.split(outputFN)[0] 434 | if not os.path.exists(outputPath): 435 | os.mkdir(outputPath) 436 | 437 | # Append the frames across all audio files 438 | audioFrames = [] 439 | for fn in inputFNList: 440 | audioFrames.extend(_read_wav_as_float(fn)) 441 | 442 | # Get the speech shaped noise 443 | # I'm not sure what the third argument does, but setting it 444 | # to True makes the output sound horrible in my experience. 445 | noiseFrames = _noise_from_signal(audioFrames, framerate, False) 446 | 447 | # Crop the file if specified by parameter /outputDuration/ 448 | if outputDuration is not None: 449 | duration = len(noiseFrames) / framerate 450 | if duration < outputDuration: 451 | errMsg = ( 452 | "Duration shorter than requested for file '%s'. " "Not cropping output." 453 | ) 454 | print(errMsg % outputDuration) 455 | else: 456 | noiseFrames = noiseFrames[: outputDuration * framerate] 457 | 458 | _write_wav(outputFN, framerate, noiseFrames, True) 459 | 460 | 461 | def maskSpeech(inputFN, noiseFN, outputFN, snr): 462 | """ 463 | Mask the input file with the noise file with level snr (dB). 464 | 465 | noise file can be generated with generateSpeechShapedNoise() 466 | 467 | Interesting snr values, that increasingly distort the speech, 468 | are 3 to -11. See Aubanel et al 2014 for more information. 469 | """ 470 | 471 | outputPath = os.path.split(outputFN)[0] 472 | if not os.path.exists(outputPath): 473 | os.mkdir(outputPath) 474 | 475 | audioFrames = _read_wav_as_float(inputFN) 476 | noiseFrames = _read_wav_as_float(noiseFN) 477 | clean_level = _dbspl(audioFrames) 478 | framerate = _getFramerate(inputFN) 479 | noiseFramerate = _getFramerate(inputFN) 480 | 481 | if framerate != noiseFramerate: 482 | InconsistentFramerateException([inputFN, noiseFN], [framerate, noiseFramerate]) 483 | 484 | outputFrames = _mix_noise(audioFrames[:], noiseFrames[:], clean_level, snr)[1] 485 | 486 | print(outputFN) 487 | _write_wav(outputFN, framerate, outputFrames, True) 488 | 489 | 490 | def batchMaskSpeakerData( 491 | fnList, noiseProfileFN, outputPath, snrList, regenerateNoiseProfile=True 492 | ): 493 | """ 494 | Given a set of speech from a single speaker, mask each file with noise 495 | 496 | Create the speech shaped noise by combining all the speech files. 497 | 498 | This is a convenience function that combines the functionality of 499 | generateNoise() and maskSpeech() 500 | """ 501 | 502 | if not os.path.exists(outputPath): 503 | os.mkdir(outputPath) 504 | 505 | # Generate the noise profile 506 | if regenerateNoiseProfile is True or not os.path.exists(noiseProfileFN): 507 | generateNoise(fnList, noiseProfileFN) 508 | 509 | # Mask the speech files 510 | for snr in snrList: 511 | snrOutputPath = join(outputPath, repr(snr)) 512 | if not os.path.exists(snrOutputPath): 513 | os.mkdir(snrOutputPath) 514 | 515 | for fnFullPath in fnList: 516 | fn = os.path.split(fnFullPath)[1] 517 | maskSpeech(fnFullPath, noiseProfileFN, join(snrOutputPath, fn), snr) 518 | 519 | 520 | if __name__ == "__main__": 521 | # Example usage 522 | _inputPath = r"C:\Users\Tim\Desktop\cleaned_wavs" 523 | 524 | _noiseFN = r"C:\Users\Tim\Desktop\noise_profiles\amelia_ssn.wav" 525 | _outputPath = r"C:\Users\Tim\Desktop\noise_filtered_speech" 526 | 527 | # You can easily filter each audio file with different snrs by using this 528 | # list. Each will be output to an appropriately labeled subfolder of 529 | # the output path 530 | _snrList = [ 531 | -3, 532 | ] 533 | 534 | # You can manually create a list or use this search function to find 535 | # all of the files produced by the same speaker which you want to 536 | # create a speech shaped noise for and which you subsequently want 537 | # to mask using that noise. 538 | _fnList = findFiles(_inputPath, filterExt=".wav", addPath=True) 539 | batchMaskSpeakerData(_fnList, _noiseFN, _outputPath, _snrList) 540 | -------------------------------------------------------------------------------- /pyacoustics/speech_rate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/speech_rate/__init__.py -------------------------------------------------------------------------------- /pyacoustics/speech_rate/dictionary_estimate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jan 28, 2015 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import os 8 | from os.path import join 9 | 10 | from pyacoustics.utilities import utils 11 | from pysle import isletool 12 | 13 | 14 | def percentInside(startTime, endTime, cmprStartTime, cmprEndTime): 15 | if float(startTime) <= float(cmprEndTime) and float(endTime) >= float( 16 | cmprStartTime 17 | ): 18 | leftEdge = cmprStartTime - startTime 19 | rightEdge = endTime - cmprEndTime 20 | 21 | if leftEdge < 0: 22 | leftEdge = 0 23 | if rightEdge < 0: 24 | rightEdge = 0 25 | 26 | retVal = 1 - ((rightEdge + leftEdge)) / (endTime - startTime) 27 | 28 | # No overlap 29 | else: 30 | retVal = 0 31 | 32 | return retVal 33 | 34 | 35 | def manualPhoneCount(tgInfoPath, isleFN, outputPath, skipList=None): 36 | if skipList is None: 37 | skipList = [] 38 | 39 | utils.makeDir(outputPath) 40 | 41 | isleDict = isletool.LexicalTool(isleFN) 42 | 43 | existFNList = utils.findFiles(outputPath, filterPaths=".txt") 44 | for fn in utils.findFiles( 45 | tgInfoPath, filterExt=".txt", skipIfNameInList=existFNList 46 | ): 47 | if os.path.exists(join(outputPath, fn)): 48 | continue 49 | print(fn) 50 | 51 | dataList = utils.openCSV(tgInfoPath, fn) 52 | dataList = [row[2] for row in dataList] # start, stop, tmpLabel 53 | outputList = [] 54 | for tmpLabel in dataList: 55 | if tmpLabel not in skipList: 56 | syllableCount, phoneCount = isletool.getNumPhones( 57 | isleDict, tmpLabel, maxFlag=True 58 | ) 59 | else: 60 | syllableCount, phoneCount = 0, 0 61 | 62 | outputList.append("%d,%d" % (syllableCount, phoneCount)) 63 | 64 | outputTxt = "\n".join(outputList) 65 | 66 | with open(join(outputPath, fn), "w") as fd: 67 | fd.write(outputTxt) 68 | 69 | 70 | def manualPhoneCountForEpochs(manualCountsPath, tgInfoPath, epochPath, outputPath): 71 | utils.makeDir(outputPath) 72 | 73 | skipList = utils.findFiles(outputPath, filterExt=".txt") 74 | for fn in utils.findFiles(tgInfoPath, filterExt=".txt", skipIfNameInList=skipList): 75 | epochList = utils.openCSV(epochPath, fn) 76 | tgInfo = utils.openCSV(tgInfoPath, fn) 77 | manualCounts = utils.openCSV(manualCountsPath, fn) 78 | 79 | epochOutputList = [] 80 | for epochTuple in epochList: # Epoch num, start, stop 81 | epochStart, epochStop = float(epochTuple[1]), float(epochTuple[2]) 82 | 83 | # Find all of the intervals that are at least partially 84 | # contained within the current epoch 85 | epochSyllableCount = 0 86 | epochPhoneCount = 0 87 | speechDuration = 0 88 | for info, counts in utils.safeZip( 89 | [tgInfo, manualCounts], enforceLength=True 90 | ): 91 | start, stop = float(info[0]), float(info[1]) 92 | syllableCount, phoneCount = float(counts[0]), float(counts[1]) 93 | 94 | # Accounts for intervals that straddle an epoch boundary 95 | multiplicationFactor = percentInside(start, stop, epochStart, epochStop) 96 | 97 | speechDuration += (stop - start) * multiplicationFactor 98 | 99 | epochSyllableCount += syllableCount * multiplicationFactor 100 | epochPhoneCount += phoneCount * multiplicationFactor 101 | 102 | epochOutputList.append( 103 | "%f,%f,%f" % (epochSyllableCount, epochPhoneCount, speechDuration) 104 | ) 105 | 106 | with open(join(outputPath, fn), "w") as fd: 107 | fd.write("\n".join(epochOutputList)) 108 | -------------------------------------------------------------------------------- /pyacoustics/speech_rate/uwe_sr.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on July 28, 2015 3 | 4 | @author: tmahrt 5 | 6 | This code estimates the speech rate of a speaker by using Uwe Reichel's matlab 7 | script for detecting syllable nuclei over some interval. 8 | """ 9 | 10 | from os.path import join 11 | 12 | from pyacoustics.utilities import utils 13 | from pyacoustics.utilities import matlab 14 | 15 | 16 | def findSyllableNuclei( 17 | inputPath, outputPath, matlabEXE, matlabScriptsPath, printCmd=False 18 | ): 19 | """ 20 | Makes a file listing the syllable nuclei for each file in inputPath 21 | """ 22 | utils.makeDir(outputPath) 23 | 24 | pathList = [matlabScriptsPath, join(matlabScriptsPath, "nucleus_detection_matlab")] 25 | cmd = "detect_syllable_nuclei('%s', '%s');" % (inputPath, outputPath) 26 | matlab.runMatlabFunction(cmd, matlabEXE, pathList, printCmd) 27 | 28 | 29 | def toAbsoluteTime(namePrefix, matlabOutputPath, startTimeList): 30 | """ 31 | Converts the sampled times from relative to absolute time 32 | 33 | The input may be split across a number of files. This script assumes 34 | that files of the pattern <><>.txt correspond 35 | to different parts of the same source file. 36 | 37 | namePrefix - name of the original wav file with no suffix 38 | speechRatePath - the path where the output of the matlab script is placed 39 | startTimeList - there needs to be one file here for each file in 40 | speechRatePath with the pattern namePrefix 41 | 42 | Returns a list of lists where each sublist corresponds to the output of 43 | one file matching <> 44 | """ 45 | # Load subset speech rate 46 | speechRateFNList = utils.findFiles( 47 | matlabOutputPath, filterExt=".txt", filterPattern=namePrefix 48 | ) 49 | 50 | returnList = [] 51 | for start, speechRateFN in utils.safeZip( 52 | [startTimeList, speechRateFNList], enforceLength=True 53 | ): 54 | speechRateList = utils.openCSV(matlabOutputPath, speechRateFN, valueIndex=0) 55 | speechRateList = [value for value in speechRateList if value != ""] 56 | speechRateList = [ 57 | str(float(start) + float(sampNum)) for sampNum in speechRateList 58 | ] 59 | 60 | returnList.append(speechRateList) 61 | 62 | return returnList 63 | 64 | 65 | def uweSyllableCountForInterval(startTime, stopTime, nucleiCenterList): 66 | countList = [ 67 | timestamp 68 | for timestamp in nucleiCenterList 69 | if timestamp >= startTime and timestamp <= stopTime 70 | ] 71 | 72 | return len(countList) 73 | -------------------------------------------------------------------------------- /pyacoustics/text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/text/__init__.py -------------------------------------------------------------------------------- /pyacoustics/text/frequency.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from os.path import join 4 | from itertools import islice 5 | 6 | try: 7 | from itertools import izip as zip 8 | except: 9 | pass 10 | import math 11 | import io 12 | 13 | from pyacoustics.utilities import utils 14 | 15 | 16 | class CountCorpus(object): 17 | def __init__(self, frequencyDict, totalCount=None): 18 | """ 19 | A generic class for handling corpora. 20 | 21 | For large corpora you can save the totalCount somewhere and pass it 22 | in during instantiation. Otherwise, it will be calculated at 23 | runtime. 24 | """ 25 | self.frequencyDict = frequencyDict 26 | 27 | if totalCount is None: 28 | totalCount = self._getNumWords() 29 | self.totalCount = totalCount 30 | 31 | def getFrequency(self, word, normFunc=None, outOfDictionaryValue=None): 32 | try: 33 | count = self.frequencyDict[word] 34 | except KeyError: 35 | if outOfDictionaryValue is None: 36 | raise 37 | else: 38 | print("OOD Word: %s" % word) 39 | count = outOfDictionaryValue 40 | 41 | try: 42 | if normFunc is None: 43 | freq = float(count) / self.totalCount 44 | else: 45 | freq = normFunc(count, self.totalCount) 46 | 47 | logFreq = math.log(float(count)) 48 | except ValueError: 49 | freq = "" 50 | logFreq = "" 51 | 52 | return count, freq, logFreq 53 | 54 | def _getNumWords(self): 55 | """ 56 | Gets the number of words in the corpus 57 | """ 58 | sumV = 0 59 | for word in self.frequencyDict.keys(): 60 | sumV += self.frequencyDict[word] 61 | 62 | return sumV 63 | 64 | 65 | class GoogleUnigram(CountCorpus): 66 | NUM_WORDS = 1024908267229.0 67 | 68 | def __init__(self, googleUnigram): 69 | # Load the corpus data 70 | frequencyDict = {} 71 | with open(googleUnigram, "r") as fd: 72 | data = fd.read() 73 | dataList = data.split() 74 | for word, count in zip( 75 | islice(dataList, 0, None, 2), islice(dataList, 1, None, 2) 76 | ): 77 | frequencyDict[word] = count 78 | 79 | super(GoogleUnigram, self).__init__(frequencyDict, GoogleUnigram.NUM_WORDS) 80 | 81 | 82 | class Switchboard(CountCorpus): 83 | NUM_WORDS = 1456224.0 84 | 85 | def __init__(self, switchboardCounts): 86 | # Load the corpus 87 | frequencyDict = {} 88 | with open(switchboardCounts, "r") as fd: 89 | data = fd.read() 90 | 91 | dataList = data.split("\n") 92 | dataList = [ 93 | row[1:-2].strip() for row in dataList if len(row) > 2 and row[0] != ";" 94 | ] 95 | dataList = [row.split(" ") for row in dataList] 96 | 97 | for row in dataList: 98 | word = row[0] 99 | count = row[-4] 100 | frequencyDict[word] = int(count) 101 | 102 | super(Switchboard, self).__init__(frequencyDict, Switchboard.NUM_WORDS) 103 | 104 | 105 | class SwitchboardTim(CountCorpus): 106 | NUM_WORDS = 1464017.0 107 | 108 | def __init__(self, switchboardCounts): 109 | frequencyDict = loadCountList(switchboardCounts) 110 | super(SwitchboardTim, self).__init__(frequencyDict, SwitchboardTim.NUM_WORDS) 111 | 112 | 113 | class Buckeye(CountCorpus): 114 | NUM_WORDS = 282575.0 # Not including words that start with '[' 115 | 116 | def __init__(self, buckeyeCounts): 117 | frequencyDict = loadCountList(buckeyeCounts) 118 | super(Buckeye, self).__init__(frequencyDict, Buckeye.NUM_WORDS) 119 | 120 | 121 | class Fischer(CountCorpus): 122 | NUM_WORDS = 21025946.0 123 | 124 | def __init__(self, fischerCounts): 125 | frequencyDict = loadCountList(fischerCounts) 126 | super(Fischer, self).__init__(frequencyDict, Fischer.NUM_WORDS) 127 | 128 | 129 | class Crea(CountCorpus): 130 | NUM_WORDS = 152554665 131 | 132 | def __init__(self, creaCounts): 133 | frequencyDict = loadCountList(creaCounts) 134 | super(Crea, self).__init__(frequencyDict, Crea.NUM_WORDS) 135 | 136 | 137 | class FrenchCorpus(CountCorpus): 138 | NUM_WORDS = None 139 | 140 | def __init__(self, frenchCounts): 141 | frequencyDict = loadCountList(frenchCounts) 142 | super(FrenchCorpus, self).__init__(frequencyDict, 0) 143 | 144 | 145 | def calcWordsPerMillion(count, totalCount): 146 | million = 1000000 147 | assert totalCount > million 148 | return count * million / totalCount 149 | 150 | 151 | def loadFrenchList(fnFullPath, outputFullPath): 152 | with io.open(fnFullPath, "r", encoding="utf-8") as fd: 153 | data = fd.read() 154 | frequencyDict = {} 155 | 156 | dataList = data.splitlines() 157 | dataList = [row.rsplit(",") for row in dataList[1:]] 158 | dataList = [(rowList[0], float(rowList[6])) for rowList in dataList] 159 | 160 | # Some items appear multiple times but with different meanings 161 | countList = [dataList.pop(0)] 162 | for word, count in dataList: 163 | if word == countList[-1][0]: 164 | countList[-1] = (word, countList[-1][1] + count) 165 | else: 166 | countList.append((word, count)) 167 | 168 | countList = [",".join((word, str(count))) for word, count in countList] 169 | 170 | with io.open(outputFullPath, "w", encoding="utf-8") as fd: 171 | fd.write("\n".join(countList)) 172 | 173 | 174 | def loadCountList(fnFullPath): 175 | """ 176 | Loads counts from file that stores word counts in the form "word, count\n" 177 | """ 178 | with io.open(fnFullPath, "r", encoding="utf-8") as fd: 179 | data = fd.read() 180 | frequencyDict = {} 181 | 182 | dataList = data.split("\n") 183 | dataList = [row.rsplit(",", 1) for row in dataList] 184 | 185 | for word, count in dataList: 186 | frequencyDict[word] = float(count) 187 | 188 | return frequencyDict 189 | 190 | 191 | def findFrequenciesForWordLists(featurePath, countObj, frequencyNormFunc): 192 | frequencyPath = join(featurePath, "frequency") 193 | utils.makeDir(frequencyPath) 194 | 195 | wordsPath = join(featurePath, "words") 196 | 197 | for fn in utils.findFiles(wordsPath): 198 | wordList = utils.openCSV(wordsPath, fn, valueIndex=0, encoding="utf-8") 199 | countList = [] 200 | for word in wordList: 201 | tmp = countObj.getFrequency(word, frequencyNormFunc, outOfDictionaryValue=1) 202 | count, freq, logFreq = tmp 203 | countList.append("%f,%f,%f" % (count, freq, logFreq)) 204 | 205 | with open(join(frequencyPath, fn), "w") as fd: 206 | fd.write("\n".join(countList)) 207 | -------------------------------------------------------------------------------- /pyacoustics/text/transcript.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Oct 20, 2014 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | from os.path import join 8 | 9 | import io 10 | 11 | from pyacoustics.utilities import utils 12 | 13 | 14 | def toWords(featurePath, outputPath): 15 | utils.makeDir(outputPath) 16 | 17 | transcriptPath = join(featurePath, "txt") 18 | 19 | for fn in utils.findFiles(transcriptPath, filterExt=".txt"): 20 | fnFullPath = join(transcriptPath, fn) 21 | with io.open(fnFullPath, "r", encoding="utf-8") as fd: 22 | data = fd.read() 23 | dataList = data.split() 24 | 25 | with io.open(join(outputPath, fn), "w", encoding="utf-8") as fd: 26 | fd.write("\n".join(dataList)) 27 | -------------------------------------------------------------------------------- /pyacoustics/textgrids/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/textgrids/__init__.py -------------------------------------------------------------------------------- /pyacoustics/textgrids/syllabify_textgrids.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Oct 22, 2014 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import os 8 | from os.path import join 9 | 10 | from praatio import textgrid 11 | from pysle import isletool 12 | from pysle import praattools 13 | 14 | 15 | from pyacoustics.utilities import utils 16 | 17 | 18 | def correctTextgridTimes(tgPath, threshold): 19 | # Are x and y unique but very very similar 20 | withinThreshold = lambda x, y: (abs(x - y) < threshold) and (x != y) 21 | 22 | outputPath = join(tgPath, "correctsTGs") 23 | utils.makeDir(outputPath) 24 | 25 | for fn in utils.findFiles(tgPath, filterExt=".TextGrid"): 26 | print(fn) 27 | tg = textgrid.openTextgrid(join(tgPath, fn), includeEmptyIntervals=False) 28 | wordTier = tg.tierDict["words"] 29 | phoneTier = tg.tierDict["phones"] 30 | 31 | for wordEntry in wordTier.entryList: 32 | for i, phoneEntry in enumerate(phoneTier.entryList): 33 | if textgrid.intervalOverlapCheck(wordEntry, phoneEntry): 34 | start = phoneEntry[0] 35 | end = phoneEntry[1] 36 | phone = phoneEntry[2] 37 | 38 | if withinThreshold(wordEntry[0], start): 39 | start = wordEntry[0] 40 | elif withinThreshold(wordEntry[1], start): 41 | start = wordEntry[1] 42 | elif withinThreshold(wordEntry[0], end): 43 | end = wordEntry[0] 44 | elif withinThreshold(wordEntry[1], end): 45 | end = wordEntry[1] 46 | 47 | phoneTier.entryList[i] = (start, end, phone) 48 | 49 | tg.save(join(outputPath, fn), format="short_textgrid", includeBlankSpaces=True) 50 | 51 | 52 | def syllabifyTextgrids(tgPath, islePath): 53 | isleDict = isletool.LexicalTool(islePath) 54 | 55 | outputPath = join(tgPath, "syllabifiedTGs") 56 | utils.makeDir(outputPath) 57 | skipLabelList = ["", "xx", "", "{B_TRANS}", "{E_TRANS}"] 58 | 59 | for fn in utils.findFiles(tgPath, filterExt=".TextGrid"): 60 | if os.path.exists(join(outputPath, fn)): 61 | continue 62 | 63 | tg = textgrid.openTextgrid(join(tgPath, fn)) 64 | 65 | syllableTG = praattools.syllabifyTextgrid( 66 | isleDict, tg, "words", "phones", skipLabelList=skipLabelList 67 | ) 68 | 69 | outputTG = textgrid.Textgrid() 70 | outputTG.addTier(tg.tierDict["words"]) 71 | outputTG.addTier(tg.tierDict["phones"]) 72 | # outputTG.addTier(syllableTG.tierDict["syllable"]) 73 | outputTG.addTier(syllableTG.tierDict["tonic"]) 74 | 75 | outputTG.save( 76 | join(outputPath, fn), format="short_textgrid", includeBlankSpaces=True 77 | ) 78 | 79 | 80 | if __name__ == "__main__": 81 | tmpISLEPath = "/Users/tmahrt/Dropbox/workspace/pysle/test/islev2.txt" 82 | # correctTextgridTimes(tgPath, 0.0025) 83 | 84 | tmpTGPath = join( 85 | "/Users/tmahrt/Desktop/experiments/LMEDS_studies", 86 | "RPT_English/features/tobi_textgrids/correctsTGs", 87 | ) 88 | syllabifyTextgrids(tmpTGPath, tmpISLEPath) 89 | -------------------------------------------------------------------------------- /pyacoustics/textgrids/textgrids.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Oct 20, 2014 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import os 8 | from os.path import join 9 | 10 | import io 11 | 12 | from praatio import textgrid 13 | 14 | from pyacoustics.utilities import utils 15 | 16 | 17 | def _navigateTGs(tgPath, name, tierName): 18 | """ 19 | Converts a textgrid into a plain text format 20 | 21 | Each labels is output by the 22 | """ 23 | 24 | tg = textgrid.openTextgrid(join(tgPath, name + ".TextGrid")) 25 | tier = tg.tierDict[tierName] 26 | 27 | for start, stop, label in tier.entryList: 28 | if label.strip() == "": 29 | continue 30 | 31 | yield start, stop, label 32 | 33 | 34 | def extractTGInfo(inputPath, outputPath, tierName): 35 | utils.makeDir(outputPath) 36 | 37 | for name in utils.findFiles(inputPath, filterExt=".TextGrid", stripExt=True): 38 | if os.path.exists(join(outputPath, name + ".txt")): 39 | continue 40 | print(name) 41 | 42 | outputList = [] 43 | for start, stop, label in _navigateTGs(inputPath, name, tierName): 44 | outputList.append("%f,%f,%s" % (start, stop, label)) 45 | 46 | outputTxt = "\n".join(outputList) 47 | outputFN = join(outputPath, name + ".txt") 48 | with io.open(outputFN, "w", encoding="utf-8") as fd: 49 | fd.write(outputTxt) 50 | 51 | 52 | def extractTranscript(featurePath, tierName): 53 | """ 54 | Outputs each label of a textgrid on a separate line in a plain text file 55 | """ 56 | 57 | tgPath = join(featurePath, "textgrids") 58 | 59 | outputPath = join(featurePath, "transcript") 60 | utils.makeDir(outputPath) 61 | 62 | for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True): 63 | outputList = [] 64 | for entry in _navigateTGs(tgPath, name, tierName): 65 | label = entry[2] 66 | outputList.append("%s" % (label)) 67 | 68 | outputTxt = "\n".join(outputList) 69 | outputFN = join(outputPath, name + ".txt") 70 | with io.open(outputFN, "w", encoding="utf-8") as fd: 71 | fd.write(outputTxt) 72 | 73 | 74 | def extractWords(tgPath, tierName, outputPath): 75 | utils.makeDir(outputPath) 76 | 77 | for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True): 78 | outputList = [] 79 | for entry in _navigateTGs(tgPath, name, tierName): 80 | label = entry[2] 81 | for word in label.split(): 82 | outputList.append("%s" % (word)) 83 | 84 | outputTxt = "\n".join(outputList) 85 | outputFN = join(outputPath, name + ".txt") 86 | with io.open(outputFN, "w", encoding="utf-8") as fd: 87 | fd.write(outputTxt) 88 | -------------------------------------------------------------------------------- /pyacoustics/utilities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/utilities/__init__.py -------------------------------------------------------------------------------- /pyacoustics/utilities/error_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jun 7, 2015 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import os 8 | 9 | 10 | class ApplicationNotFound(Exception): 11 | def __init__(self, applicationName): 12 | super(ApplicationNotFound, self).__init__() 13 | self.applicationName = applicationName 14 | 15 | def __str__(self): 16 | return "Application (%s) does not exist" % self.applicationName 17 | 18 | 19 | def checkForApplication(application): 20 | if not os.path.exists(application): 21 | raise ApplicationNotFound(application) 22 | -------------------------------------------------------------------------------- /pyacoustics/utilities/filters.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Oct 20, 2014 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import math 8 | 9 | 10 | def medianFilter(dist, window, useEdgePadding): 11 | offset = int(math.floor(window / 2.0)) 12 | length = len(dist) 13 | 14 | returnList = [] 15 | for x in range(length): 16 | dataToFilter = [] 17 | # If using edge padding or if 0 <= context <= length 18 | if useEdgePadding or (((0 <= x - offset) and (x + offset < length))): 19 | preContext = [] 20 | currentContext = [ 21 | dist[x], 22 | ] 23 | postContext = [] 24 | 25 | lastKnownLargeIndex = 0 26 | for y in range(1, offset + 1): # 1-based 27 | if x + y >= length: 28 | if lastKnownLargeIndex == 0: 29 | largeIndexValue = x 30 | else: 31 | largeIndexValue = lastKnownLargeIndex 32 | else: 33 | largeIndexValue = x + y 34 | lastKnownLargeIndex = x + y 35 | 36 | postContext.append(dist[largeIndexValue]) 37 | 38 | if x - y < 0: 39 | smallIndexValue = 0 40 | else: 41 | smallIndexValue = x - y 42 | 43 | preContext.insert(0, dist[smallIndexValue]) 44 | 45 | dataToFilter = preContext + currentContext + postContext 46 | value = _median(dataToFilter) 47 | else: 48 | value = dist[x] 49 | returnList.append(value) 50 | 51 | return returnList 52 | 53 | 54 | def _median(valList): 55 | valList = valList[:] 56 | valList.sort() 57 | 58 | if len(valList) % 2 == 0: # Even 59 | i = int(len(valList) / 2.0) 60 | medianVal = (valList[i - 1] + valList[i]) / 2.0 61 | else: # Odd 62 | i = int(len(valList) / 2.0) 63 | medianVal = valList[i] 64 | 65 | return medianVal 66 | -------------------------------------------------------------------------------- /pyacoustics/utilities/matlab.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jul 28, 2015 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import subprocess 8 | 9 | from pyacoustics.utilities import error_utils 10 | 11 | 12 | def runMatlabFunction(command, matlabEXE, matlabCodePathList, printCmd=False): 13 | error_utils.checkForApplication(matlabEXE) 14 | 15 | pathCode = "".join( 16 | ["addpath('%s');" % matlabCodePath for matlabCodePath in matlabCodePathList] 17 | ) 18 | exitCode = "exit;" 19 | 20 | codeSequence = pathCode + command + exitCode 21 | 22 | if printCmd is True: 23 | print(matlabEXE + ' -nosplash -nodesktop -r "%s"' % codeSequence) 24 | myProcess = subprocess.Popen( 25 | [matlabEXE, "-nosplash", "-nodesktop", "-r", codeSequence] 26 | ) 27 | if myProcess.wait(): 28 | exit() # Something has gone wrong (an error message should be printed) 29 | -------------------------------------------------------------------------------- /pyacoustics/utilities/my_math.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Apr 3, 2015 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import math 8 | 9 | 10 | def rms(intensityValues): 11 | intensityValues = [val**2 for val in intensityValues] 12 | meanVal = sum(intensityValues) / len(intensityValues) 13 | return math.sqrt(meanVal) 14 | 15 | 16 | def linspace(start, stop, n): 17 | if n == 1: 18 | return [ 19 | stop, 20 | ] 21 | h = (stop - start) / float(n - 1) 22 | return [start + h * i for i in range(n)] 23 | 24 | 25 | def orderOfMagnitude(val): 26 | return int(math.floor(math.log10(val))) 27 | -------------------------------------------------------------------------------- /pyacoustics/utilities/normalize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Oct 16, 2012 3 | 4 | @author: timmahrt 5 | """ 6 | 7 | import math 8 | 9 | 10 | def _zscoreNormalize(raw, mean, stdDev): 11 | # No problems related to integers or 64-bit floats (which don't trigger 12 | # a divide by zero exception) 13 | raw, mean, stdDev = float(raw), float(mean), float(stdDev) 14 | return (raw - mean) / stdDev 15 | 16 | 17 | def zscoreNormalizeValue(value, distribution): 18 | """ 19 | Appropriate to use when the context (the distribution) varies. 20 | """ 21 | mean = sum(distribution) / len(distribution) 22 | 23 | tmpList = [(tmpVal - mean) ** 2 for tmpVal in distribution] 24 | standardDeviation = math.sqrt(sum(tmpList) / len(tmpList)) 25 | 26 | return _zscoreNormalize(value, mean, standardDeviation) 27 | 28 | 29 | def syntagmaticNormalization(sampleIndexList, dataList, contextList): 30 | """ 31 | Normalizes using local context (before and after the occurrence) 32 | 33 | 'sampleIndexList' contains the list of indices for values in 'contextList' 34 | that should be normalized. 35 | 'contextList' provides the indices for all words that should be 36 | considered (including the present one) 37 | e.g. for +/- 2 words [-2, -1, 0, 1, 2] 38 | 'featureExtractionFunc' provides the function that extracts the 39 | relevant feature to be normalized from the words (could be 40 | a word or syllable level feature) 41 | """ 42 | 43 | dataList = [float(value) for value in dataList] 44 | 45 | def doSkipValue(value): 46 | return value == 0 or value == "None" 47 | 48 | # Get the files associated with this speaker 49 | # - be patient, running retrieveStressIndex() takes some time the 50 | # first time 51 | # fnList = fetchFNsForSpeaker(speakerID) 52 | 53 | negativeContextList = [contextI for contextI in contextList if contextI < 0] 54 | negativeContextList.sort(reverse=True) 55 | positiveContextList = [contextI for contextI in contextList if contextI > 0] 56 | positiveContextList.sort() 57 | 58 | # Create index 59 | outputList = [] 60 | for i in sampleIndexList: 61 | value = dataList[i] 62 | 63 | # A value of 0.0 generally is not meaningful 64 | # (TODO: is there anywhere where this is not the case?) 65 | if i == -1 or doSkipValue(value): 66 | outputList.append(0) 67 | continue 68 | 69 | contextValueList = [ 70 | dataList[i], 71 | ] 72 | 73 | for incr, tmpContextList in [ 74 | (-1, negativeContextList[:]), 75 | (1, positiveContextList[:]), 76 | ]: 77 | prevContextValue = dataList[i] 78 | for contextI in tmpContextList: 79 | try: 80 | assert i + contextI >= 0 81 | subValue = dataList[i + contextI] 82 | 83 | # If we've gone outside the bounds of the file, just 84 | # repeat the last known good value 85 | except (IndexError, AssertionError): 86 | contextValueList.append(prevContextValue) 87 | continue 88 | 89 | # Don't count words with meaningless values as part 90 | # of the context 91 | if doSkipValue(subValue): 92 | tmpContextList.append(tmpContextList[-1] + incr) 93 | continue 94 | 95 | prevContextValue = subValue 96 | contextValueList.append(subValue) 97 | 98 | normalizedValue = zscoreNormalizeValue(value, contextValueList) 99 | outputList.append(normalizedValue) 100 | 101 | return outputList 102 | -------------------------------------------------------------------------------- /pyacoustics/utilities/sequences.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jun 5, 2013 3 | 4 | @author: timmahrt 5 | """ 6 | import math 7 | 8 | from pyacoustics.utilities import my_math 9 | 10 | 11 | DO_SAMPLE_GATED = 1 # Each subsequence overlaps by (n-1)/2 12 | DO_SAMPLE_EXCLUSIVE = 2 # No index appears in two subsequences 13 | DO_SAMPLE_ALL = 3 # Each index acts as the control point once 14 | 15 | 16 | def compressList(targetList): 17 | """ 18 | Compresses a list into pairs of the form (value, num_continuous_occurances) 19 | 20 | e.g. targetList = [1, 1, 1, 1, 2, 2, 1, 1, 3] 21 | >> [(1,4), (2, 2), (1, 2), (3, 1),] 22 | """ 23 | 24 | currentValue = targetList[0] 25 | startIndex = 0 26 | i = 0 27 | 28 | outputList = [] 29 | while i < len(targetList): 30 | if targetList[i] == currentValue: 31 | i += 1 32 | continue 33 | 34 | outputList.append([currentValue, startIndex, i]) 35 | 36 | currentValue = targetList[i] 37 | startIndex = i 38 | i += 1 39 | 40 | if len(outputList) == 0 or outputList[-1][0] != currentValue: 41 | outputList.append([currentValue, startIndex, i]) 42 | 43 | print(len(targetList)) 44 | print(outputList) 45 | return outputList 46 | 47 | 48 | def compressedListTransform(compressedList, timeStep, timeThreshold=None): 49 | """ 50 | Isolates the unique values in compressedList and converts them to time 51 | 52 | timeThreshold can be set to ignore values that are not long enough, adding 53 | their content to whatever came before (prevents fragmenting data too much). 54 | """ 55 | 56 | returnDict = {} 57 | countDict = {} 58 | lastGoodLabel = None 59 | for label, start, end in compressedList: 60 | countDict.setdefault(label, 0) 61 | returnDict.setdefault(label, []) 62 | 63 | startTime = start * timeStep 64 | endTime = end * timeStep 65 | 66 | # Merge this entry with the previous one 67 | # if it is too short (noise tolerance) 68 | tmpDuration = (end - start) * timeStep 69 | if timeThreshold is not None and tmpDuration < timeThreshold: 70 | # If the very first entry is less than 0.3 seconds long 71 | if lastGoodLabel is not None and returnDict[lastGoodLabel] != []: 72 | returnDict[lastGoodLabel][-1][1] = endTime 73 | continue 74 | 75 | # If the previous label and this one are the same, merge entries 76 | if label == lastGoodLabel: 77 | returnDict[label][-1][1] = endTime 78 | 79 | # Otherwise, create a new entry 80 | else: 81 | returnDict[label].append([startTime, endTime, str(countDict[label])]) 82 | countDict[label] += 1 83 | lastGoodLabel = label 84 | 85 | return returnDict 86 | 87 | 88 | def sampleMiddle(dataList, i, chunkSize): 89 | """ 90 | The control point lies in the center (i - 1 ) / 2.0 91 | """ 92 | assert (chunkSize % 2) == 1 # i must be an odd number 93 | halfChunk = int(math.floor(chunkSize / 2.0)) 94 | 95 | subList = [] 96 | indexList = [] 97 | start = i - halfChunk if i - halfChunk >= 0 else 0 98 | end = i + halfChunk if i + halfChunk < len(dataList) else len(dataList) - 1 99 | 100 | # Handling underflow 101 | if i - halfChunk < 0: 102 | subList += [ 103 | dataList[0], 104 | ] * abs(i - halfChunk) 105 | indexList += [ 106 | 0, 107 | ] * abs(i - halfChunk) 108 | 109 | # The normal range 110 | mainBody = [dataList[j] for j in range(start, end + 1)] 111 | uniqueChunkLen = len(mainBody) 112 | subList.extend(mainBody) 113 | indexList.extend([j for j in range(start, end + 1)]) 114 | 115 | # Handling overflow 116 | if i + halfChunk >= len(dataList): 117 | subList += [ 118 | dataList[len(dataList) - 1], 119 | ] * ((1 + i + halfChunk) - len(dataList)) 120 | indexList.extend( 121 | [ 122 | len(dataList) - 1, 123 | ] 124 | * ((1 + i + halfChunk - len(dataList))) 125 | ) 126 | 127 | return subList, indexList, uniqueChunkLen 128 | 129 | 130 | def sampleLeft(dataList, i, chunkSize): 131 | """ 132 | The control point lies on the left edge (i = 0) 133 | """ 134 | subList = [] 135 | indexList = [] 136 | start = i 137 | end = i + chunkSize if i + chunkSize < len(dataList) else len(dataList) 138 | 139 | # The normal range 140 | mainBody = [dataList[j] for j in range(start, end)] 141 | uniqueChunkLen = len(mainBody) 142 | subList.extend(mainBody) 143 | indexList.extend([j for j in range(start, end)]) 144 | 145 | # Handling overflow 146 | if i + chunkSize >= len(dataList): 147 | subList += [ 148 | dataList[len(dataList) - 1], 149 | ] * ((1 + i + chunkSize) - len(dataList)) 150 | indexList += [ 151 | len(dataList) - 1, 152 | ] * ((1 + i + chunkSize) - len(dataList)) 153 | 154 | return subList, indexList, uniqueChunkLen 155 | 156 | 157 | def sampleRight(dataList, i, chunkSize): 158 | """ 159 | The control point lies on the right edge (i = -1) 160 | """ 161 | subList = [] 162 | indexList = [] 163 | start = 1 + i - chunkSize if 1 + i - chunkSize >= 0 else 0 164 | end = i + 1 if i < len(dataList) else len(dataList) 165 | 166 | # Handling underflow 167 | # print("blah", abs(i - chunkSize), start, end) 168 | if i - chunkSize < 0: 169 | subList += [ 170 | dataList[0], 171 | ] * (abs(i - chunkSize + 1)) 172 | indexList += [ 173 | 0, 174 | ] * (abs(i - chunkSize + 1)) 175 | 176 | # The normal range 177 | # print(start, end) 178 | mainBody = [dataList[j] for j in range(start, end)] 179 | uniqueChunkLen = len(mainBody) 180 | subList.extend(mainBody) 181 | indexList.extend([j for j in range(start, end)]) 182 | 183 | # Handling overflow 184 | if i >= len(dataList): 185 | subList += [ 186 | dataList[len(dataList) - 1], 187 | ] * (chunkSize - len(indexList)) 188 | indexList += [ 189 | len(dataList) - 1, 190 | ] * (chunkSize - len(indexList)) 191 | 192 | return subList, indexList, uniqueChunkLen 193 | 194 | 195 | def subsequenceGenerator(dataList, chunkSize, sampleFunc, stepSizeFlag): 196 | """ 197 | Can iteratively generate subsequences in a variety of fashions 198 | 199 | chunkSize - the size of each chunk 200 | sampleFunc - e.g. sampleMiddle(), sampleLeft(), sampleRight(), determines 201 | the 'controlPoint' 202 | stepSize - the distance between starting points 203 | 204 | Regardless of the parameters, all values will appear in one of the 205 | subsequences, including the endpoints. Each subsequence is the same 206 | length--if necessary, values are repeated on the tail ends of the 207 | list 208 | """ 209 | 210 | if stepSizeFlag == DO_SAMPLE_EXCLUSIVE: 211 | stepSize = chunkSize 212 | elif stepSizeFlag == DO_SAMPLE_GATED: 213 | stepSize = int(math.floor(chunkSize / 2.0)) 214 | elif stepSizeFlag == DO_SAMPLE_ALL: 215 | stepSize = 1 216 | 217 | controlPoint = 0 218 | finalIndex = 0 219 | doneIterating = False 220 | while not doneIterating: 221 | subSequence, subSequenceIndices, sampledLen = sampleFunc( 222 | dataList, controlPoint, chunkSize 223 | ) 224 | 225 | finalIndex = subSequenceIndices[-1] 226 | isEndpointLastValue = finalIndex >= (len(dataList) - 1) 227 | isControlPointLastValue = controlPoint >= (len(dataList) - 1) 228 | 229 | # Regardless of what the control point was, end when the last index 230 | # in the subset matches the length of the data list 231 | if stepSizeFlag == DO_SAMPLE_EXCLUSIVE: 232 | doneIterating = isEndpointLastValue 233 | 234 | # When the control point index reaches the end of the data list 235 | # (i.e., all values have been represented in some list, end) 236 | else: 237 | doneIterating = isControlPointLastValue 238 | 239 | controlPoint += stepSize 240 | 241 | if stepSizeFlag == DO_SAMPLE_GATED: 242 | if sampleFunc == sampleMiddle: 243 | region = subSequenceIndices[int((chunkSize - 1) / 2.0) : -1] 244 | elif sampleFunc == sampleLeft: 245 | region = subSequenceIndices[: int((chunkSize - 1) / 2.0)] 246 | elif sampleFunc == sampleRight: 247 | region = subSequenceIndices[int((chunkSize - 1) / 2.0) + 1 :] 248 | 249 | sampledLen = int((chunkSize - 1) / 2.0) 250 | sampledLen = sampledLen - (sampledLen - len(set(region))) 251 | 252 | if doneIterating and sampleFunc != sampleRight: 253 | sampledLen = 0 254 | 255 | yield subSequence, subSequenceIndices, sampledLen 256 | 257 | 258 | def interp(start, stop, n): 259 | for i in range(n): 260 | yield start + i * (stop - start) / float(n - 1) 261 | 262 | 263 | # Adapted this from online - for getting a set of evenly spaced intervals 264 | # from a list 265 | # http://stackoverflow.com/questions/10084436/generating-evenly-distributed- 266 | # multiples-samples-within-a-range 267 | def getEvenlySpacedSteps(start, end, n): 268 | assert end + 1 - start >= n 269 | 270 | # The usual case 271 | if n != 1: 272 | step = (end - start) / float(n - 1) 273 | retList = [int(round(start + x * step)) for x in range(n)] 274 | 275 | # If someone only wants 1 sample, just take the middle sample 276 | elif n == 1: 277 | step = (end - start) / float(2) 278 | retList = [ 279 | int(round((end - start) / float(2))), 280 | ] 281 | 282 | return retList 283 | 284 | 285 | def binDistribution(distList, numBins, minV=None, maxV=None): 286 | """ 287 | Places all data into the closest of n evenly spaced bins 288 | """ 289 | 290 | if minV is None: 291 | minV = min(distList) 292 | 293 | if maxV is None: 294 | maxV = max(distList) 295 | 296 | binValueArray = my_math.linspace(minV, maxV, numBins) 297 | 298 | binnedValueList = [] 299 | for value in distList: 300 | diffList = list(abs(binValueArray - value)) 301 | smallestDiff = min(diffList) 302 | binIndex = diffList.index(smallestDiff) 303 | 304 | binnedValueList.append(binValueArray[binIndex]) 305 | 306 | return binnedValueList 307 | 308 | 309 | def findLongestSublist(listOfLists): 310 | longestList = [] 311 | i = None 312 | for i, lst in enumerate(listOfLists): 313 | if len(lst) > len(longestList): 314 | longestList = lst 315 | 316 | return i, longestList 317 | 318 | 319 | def invertIntervalList(entryList, minValue=0, maxValue=None): 320 | """ 321 | Given a list of ordinal events, inverts the start and end positions 322 | 323 | e.g. input [(5, 6), (10, 13), (14, 16)] 324 | output [(0, 5), (6, 10), (13, 14)] 325 | """ 326 | if entryList == []: 327 | return [] 328 | 329 | newEntryList = [] 330 | i = 0 331 | 332 | # Add possible initial interval 333 | if minValue is not None: 334 | if entryList[0][0] > minValue: 335 | newEntryList.append((minValue, entryList[0][0])) 336 | 337 | while i + 1 < len(entryList): 338 | newEntryList.append((entryList[i][1], entryList[i + 1][0])) 339 | i += 1 340 | 341 | # Add possible trailing interval 342 | if maxValue is not None: 343 | if entryList[i][1] < maxValue: 344 | newEntryList.append((entryList[i][1], maxValue)) 345 | 346 | return newEntryList 347 | -------------------------------------------------------------------------------- /pyacoustics/utilities/statistics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Apr 2, 2015 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import math 8 | 9 | 10 | def medianFilter(dist, window, useEdgePadding): 11 | offset = int(math.floor(window / 2.0)) 12 | length = len(dist) 13 | 14 | returnList = [] 15 | for x in range(length): 16 | dataToFilter = [] 17 | # If using edge padding or if 0 <= context <= length 18 | if useEdgePadding or (((0 <= x - offset) and (x + offset < length))): 19 | preContext = [] 20 | currentContext = [ 21 | dist[x], 22 | ] 23 | postContext = [] 24 | 25 | lastKnownLargeIndex = 0 26 | for y in range(1, offset + 1): # 1-based 27 | if x + y >= length: 28 | if lastKnownLargeIndex == 0: 29 | largeIndexValue = x 30 | else: 31 | largeIndexValue = lastKnownLargeIndex 32 | else: 33 | largeIndexValue = x + y 34 | lastKnownLargeIndex = x + y 35 | 36 | postContext.append(dist[largeIndexValue]) 37 | 38 | if x - y < 0: 39 | smallIndexValue = 0 40 | else: 41 | smallIndexValue = x - y 42 | 43 | preContext.insert(0, dist[smallIndexValue]) 44 | 45 | dataToFilter = preContext + currentContext + postContext 46 | value = getMedian(dataToFilter) 47 | else: 48 | value = dist[x] 49 | 50 | returnList.append(value) 51 | 52 | return returnList 53 | 54 | 55 | def getMedian(dist): 56 | assert len(dist) > 0 57 | 58 | dist = sorted(dist) 59 | length = len(dist) 60 | 61 | halfPoint = int(length / 2.0) 62 | 63 | if length % 2 == 0: 64 | median = (dist[halfPoint - 1] + dist[halfPoint]) / 2.0 65 | else: 66 | median = dist[halfPoint] 67 | 68 | return median 69 | -------------------------------------------------------------------------------- /pyacoustics/utilities/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Oct 11, 2012 3 | 4 | @author: timmahrt 5 | """ 6 | 7 | import os 8 | from os.path import join 9 | 10 | import functools 11 | import itertools 12 | import shutil 13 | import io 14 | import inspect 15 | 16 | 17 | pyAcousticsPath = os.path.split(inspect.getfile(inspect.currentframe()))[0] 18 | # Get out of the 'utilities' folder 19 | pyAcousticsPath = os.path.split(pyAcousticsPath)[0] 20 | scriptsPath = join(pyAcousticsPath, "praatScripts") 21 | 22 | 23 | def _getMatchFunc(pattern): 24 | """ 25 | An unsophisticated pattern matching function 26 | """ 27 | 28 | # '#' Marks word boundaries, so if there is more than one we need to do 29 | # something special to make sure we're not mis-representings them 30 | assert pattern.count("#") < 2 31 | 32 | def startsWith(subStr, fullStr): 33 | return fullStr[: len(subStr)] == subStr 34 | 35 | def endsWith(subStr, fullStr): 36 | return fullStr[-1 * len(subStr) :] == subStr 37 | 38 | def inStr(subStr, fullStr): 39 | return subStr in fullStr 40 | 41 | # Selection of the correct function 42 | if pattern[0] == "#": 43 | pattern = pattern[1:] 44 | cmpFunc = startsWith 45 | 46 | elif pattern[-1] == "#": 47 | pattern = pattern[:-1] 48 | cmpFunc = endsWith 49 | 50 | else: 51 | cmpFunc = inStr 52 | 53 | return functools.partial(cmpFunc, pattern) 54 | 55 | 56 | def findFiles( 57 | path, 58 | filterPaths=False, 59 | filterExt=None, 60 | filterPattern=None, 61 | skipIfNameInList=None, 62 | stripExt=False, 63 | ): 64 | fnList = os.listdir(path) 65 | 66 | if filterPaths is True: 67 | fnList = [ 68 | folderName 69 | for folderName in fnList 70 | if os.path.isdir(os.path.join(path, folderName)) 71 | ] 72 | 73 | if filterExt is not None: 74 | splitFNList = [ 75 | [ 76 | fn, 77 | ] 78 | + list(os.path.splitext(fn)) 79 | for fn in fnList 80 | ] 81 | fnList = [fn for fn, name, ext in splitFNList if ext == filterExt] 82 | 83 | if filterPattern is not None: 84 | splitFNList = [ 85 | [ 86 | fn, 87 | ] 88 | + list(os.path.splitext(fn)) 89 | for fn in fnList 90 | ] 91 | matchFunc = _getMatchFunc(filterPattern) 92 | fnList = [fn for fn, name, ext in splitFNList if matchFunc(name)] 93 | 94 | if skipIfNameInList is not None: 95 | targetNameList = [os.path.splitext(fn)[0] for fn in skipIfNameInList] 96 | fnList = [fn for fn in fnList if os.path.splitext(fn)[0] not in targetNameList] 97 | 98 | if stripExt is True: 99 | fnList = [os.path.splitext(fn)[0] for fn in fnList] 100 | 101 | fnList.sort() 102 | return fnList 103 | 104 | 105 | def openCSV(path, fn, valueIndex=None, encoding="utf-8"): 106 | """ 107 | Load a feature 108 | 109 | In many cases we only want a single value from the feature (mainly because 110 | the feature only contains one value). In these situations, the user 111 | can indicate that rather than receiving a list of lists, they can receive 112 | a lists of values, where each value represents the item in the row 113 | indicated by valueIndex. 114 | """ 115 | 116 | # Load CSV file 117 | with io.open(join(path, fn), "r", encoding=encoding) as fd: 118 | featureList = fd.read().splitlines() 119 | featureList = [row.split(",") for row in featureList] 120 | 121 | if valueIndex is not None: 122 | featureList = [row[valueIndex] for row in featureList] 123 | 124 | return featureList 125 | 126 | 127 | def changeFileType(path, fromExt, toExt): 128 | if fromExt[0] != ".": 129 | fromExt = "." + fromExt 130 | if toExt[0] != ".": 131 | toExt = "." + toExt 132 | 133 | for fn in os.listdir(path): 134 | name, ext = os.path.splitext(fn) 135 | if ext == fromExt: 136 | shutil.move(join(path, fn), join(path, name + toExt)) 137 | 138 | 139 | def makeDir(path): 140 | if not os.path.exists(path): 141 | os.mkdir(path) 142 | 143 | 144 | def extractLines(path, matchStr, outputDir="output"): 145 | outputPath = join(path, outputDir) 146 | makeDir(outputPath) 147 | 148 | for fn in findFiles(path, filterExt=".csv"): 149 | with io.open(join(path, fn), "r", encoding="utf-8") as fd: 150 | data = fd.read() 151 | dataList = data.split("\n") 152 | 153 | dataList = [line for line in dataList if matchStr in line] 154 | 155 | with io.open(join(outputPath, fn), "w", encoding="utf-8") as fd: 156 | fd.write("\n".join(dataList)) 157 | 158 | 159 | def cat(fn1, fn2, outputFN): 160 | with io.open(fn1, "r", encoding="utf-8") as fd: 161 | txt1 = fd.read() 162 | with io.open(fn2, "r", encoding="utf-8") as fd: 163 | txt2 = fd.read() 164 | 165 | with io.open(outputFN, "w", encoding="utf-8") as fd: 166 | fd.write(txt1 + txt2) 167 | 168 | 169 | def catAll(path, ext, ensureNewline=False): 170 | outputPath = join(path, "cat_output") 171 | makeDir(outputPath) 172 | 173 | outputList = [] 174 | for fn in findFiles(path, filterExt=ext): 175 | with io.open(join(path, fn), "r", encoding="utf-8") as fd: 176 | data = fd.read() 177 | 178 | if ensureNewline and data[-1] != "\n": 179 | data += "\n" 180 | 181 | outputList.append(data) 182 | 183 | outputTxt = "".join(outputList) 184 | outputFN = join(outputPath, "catFiles" + ext) 185 | with io.open(outputFN, "w", encoding="utf-8") as fd: 186 | fd.write(outputTxt) 187 | 188 | 189 | def whatever(path): 190 | outputList = [] 191 | for fn in findFiles(path, filterExt=".txt"): 192 | outputList.extend( 193 | [ 194 | fn, 195 | ] 196 | * 30 197 | ) 198 | 199 | for fn in outputList: 200 | print(fn) 201 | 202 | 203 | def divide(numerator, denominator, zeroValue): 204 | if denominator == 0: 205 | retValue = zeroValue 206 | else: 207 | retValue = numerator / float(denominator) 208 | 209 | return retValue 210 | 211 | 212 | def safeZip(listOfLists, enforceLength): 213 | if enforceLength is True: 214 | length = len(listOfLists[0]) 215 | assert all([length == len(subList) for subList in listOfLists]) 216 | 217 | return itertools.izip_longest(*listOfLists) 218 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | Created on Oct 15, 2014 5 | 6 | @author: tmahrt 7 | """ 8 | from setuptools import setup 9 | import io 10 | 11 | setup( 12 | name="pyacoustics", 13 | python_requires=">3.6.0", 14 | version="2.0.0", 15 | author="Tim Mahrt", 16 | author_email="timmahrt@gmail.com", 17 | url="https://github.com/timmahrt/pyAcoustics", 18 | package_dir={"pyacoustics": "pyacoustics"}, 19 | packages=[ 20 | "pyacoustics", 21 | "pyacoustics.intensity_and_pitch", 22 | "pyacoustics.signals", 23 | "pyacoustics.speech_detection", 24 | "pyacoustics.speech_rate", 25 | "pyacoustics.text", 26 | "pyacoustics.textgrids", 27 | "pyacoustics.utilities", 28 | ], 29 | package_data={ 30 | "pyacoustics": [ 31 | "matlabScripts/detect_syllable_nuclei.m", 32 | ] 33 | }, 34 | license="LICENSE", 35 | install_requires=[ 36 | "praatio ~= 6.0", 37 | "typing_extensions", 38 | ], 39 | description="A collection of python scripts for extracting and analyzing acoustics from audio files.", 40 | long_description=io.open("README.md", "r", encoding="utf-8").read(), 41 | long_description_content_type="text/markdown", 42 | ) 43 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/tests/__init__.py -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/tests/integration/__init__.py -------------------------------------------------------------------------------- /tests/integration/test_integration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Nov 21, 2021 5 | 6 | @author: tmahrt 7 | 8 | Runs integration tests 9 | 10 | The examples were all written as scripts. They weren't meant to be 11 | imported or run from other code. So here, the integration test is just 12 | importing the scripts, which causes them to execute. If the code completes 13 | with no errors, then the code is at least able to complete. 14 | 15 | Testing whether or not the code actually did what it is supposed to is 16 | another issue and will require some refactoring. 17 | """ 18 | 19 | import unittest 20 | import os 21 | import sys 22 | from pathlib import Path 23 | 24 | _root = os.path.join(Path(__file__).parents[2], "examples") 25 | sys.path.append(_root) 26 | 27 | 28 | class TestIntegration(unittest.TestCase): 29 | """Integration tests""" 30 | 31 | def test_estimate_speech_rate(self): 32 | """Running 'add_tiers.py'""" 33 | import estimate_speech_rate 34 | 35 | def test_frequency(self): 36 | """Running 'anonymize_recording'""" 37 | import frequency 38 | 39 | def test_split_audio_on_silence(self): 40 | """Running 'calculate_duration.py'""" 41 | import split_audio_on_silence 42 | 43 | def test_split_audio_on_tone(self): 44 | """Running 'correct_misaligned_tiers.py'""" 45 | import split_audio_on_tone 46 | 47 | def setUp(self): 48 | unittest.TestCase.setUp(self) 49 | 50 | root = os.path.join(_root, "files") 51 | self.oldRoot = os.getcwd() 52 | os.chdir(_root) 53 | self.startingList = os.listdir(root) 54 | self.startingDir = os.getcwd() 55 | 56 | def tearDown(self): 57 | """Remove any files generated during the test""" 58 | # unittest.TestCase.tearDown(self) 59 | 60 | root = os.path.join(".", "files") 61 | endingList = os.listdir(root) 62 | endingDir = os.getcwd() 63 | rmList = [fn for fn in endingList if fn not in self.startingList] 64 | 65 | if self.oldRoot == root: 66 | for fn in rmList: 67 | fnFullPath = os.path.join(root, fn) 68 | if os.path.isdir(fnFullPath): 69 | os.rmdir(fnFullPath) 70 | else: 71 | os.remove(fnFullPath) 72 | 73 | os.chdir(self.oldRoot) 74 | 75 | 76 | if __name__ == "__main__": 77 | unittest.main() 78 | -------------------------------------------------------------------------------- /tests/test_sequences.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jul 3, 2015 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import unittest 8 | 9 | from pyacoustics.utilities import sequences 10 | 11 | 12 | class TestSequences(unittest.TestCase): 13 | 14 | LIST_A = [(5, 6), (10, 13), (14, 16)] 15 | LIST_B = [(0, 1), (5, 6), (10, 13), (14, 16)] 16 | 17 | def test_startsAtZero(self): 18 | invertedList = sequences.invertIntervalList(self.LIST_B) 19 | correctAnswer = [(1, 5), (6, 10), (13, 14)] 20 | self.assertEqual(invertedList, correctAnswer) 21 | 22 | def test_startsAtNonZero(self): 23 | invertedList = sequences.invertIntervalList(self.LIST_A) 24 | correctAnswer = [(0, 5), (6, 10), (13, 14)] 25 | self.assertEqual(invertedList, correctAnswer) 26 | 27 | def test_maxValue(self): 28 | invertedList = sequences.invertIntervalList(self.LIST_B, maxValue=20) 29 | correctAnswer = [(1, 5), (6, 10), (13, 14), (16, 20)] 30 | self.assertEqual(invertedList, correctAnswer) 31 | 32 | def test_minValue(self): 33 | invertedList = sequences.invertIntervalList(self.LIST_A, minValue=3) 34 | correctAnswer = [(3, 5), (6, 10), (13, 14)] 35 | self.assertEqual(invertedList, correctAnswer) 36 | 37 | def test_twiceInverted(self): 38 | invertedList = sequences.invertIntervalList(self.LIST_A, 0, 20) 39 | twiceInvList = sequences.invertIntervalList(invertedList, 0, 20) 40 | self.assertEqual(self.LIST_A, twiceInvList) 41 | -------------------------------------------------------------------------------- /tests/test_statistics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Apr 2, 2015 3 | 4 | @author: tmahrt 5 | """ 6 | 7 | import unittest 8 | 9 | from pyacoustics.utilities import statistics 10 | 11 | 12 | class TestStatistics(unittest.TestCase): 13 | 14 | MY_LIST = [5, 1, 10, 13, 3, 17, 9, 17] 15 | 16 | def test_evenLengthedListCorrect(self): 17 | median = statistics.getMedian(self.MY_LIST) 18 | self.assertEqual(median, 9.5) 19 | 20 | def test_oddLengthedListCorrect(self): 21 | median = statistics.getMedian(self.MY_LIST[:-1]) 22 | self.assertEqual(median, 9) 23 | 24 | def test_filterOddLengthedListCorrect(self): 25 | medianList = statistics.medianFilter(self.MY_LIST, 3, useEdgePadding=True) 26 | correctList = [5, 5, 10, 10, 13, 9, 17, 17] 27 | self.assertEqual(medianList, correctList) 28 | --------------------------------------------------------------------------------