├── .gitignore
├── CHANGELOG.md
├── DEVELOP.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── examples
    ├── __init__.py
    ├── estimate_speech_rate.py
    ├── files
    │   ├── introduction.TextGrid
    │   ├── introduction.wav
    │   ├── tone_split_data.TextGrid
    │   └── tone_split_data.wav
    ├── frequency.py
    ├── split_audio_on_silence.py
    └── split_audio_on_tone.py
├── matlabScripts
    ├── detect_syllable_nuclei.m
    └── nucleus_detection_matlab
    │   ├── column2rowvec.m
    │   ├── fu_filter.m
    │   ├── fu_i_window.m
    │   ├── fu_locmax.m
    │   ├── fu_optstruct_init.m
    │   ├── fu_pause_detector.m
    │   ├── fu_r2c.m
    │   ├── fu_rmse.m
    │   ├── fu_smooth.m
    │   ├── fu_smooth_binvec.m
    │   ├── fu_smooth_binvec_sub.m
    │   ├── fu_sylbnd.m
    │   ├── fu_sylncl.m
    │   ├── fu_sylncl_sub.m
    │   ├── fu_transp.m
    │   ├── fu_trim_vec.m
    │   ├── fu_typecount.m
    │   ├── fu_voicing.m
    │   ├── fu_voicing_sub.m
    │   ├── fu_window_bnd.m
    │   ├── fu_window_vec.m
    │   ├── fu_zero_crossing_rate.m
    │   └── row2columnvec.m
├── praatScripts
    ├── get_pitch_and_intensity.praat
    └── psolaPitch.praat
├── pyacoustics
    ├── __init__.py
    ├── aggregate_features.py
    ├── intensity_and_pitch
    │   ├── __init__.py
    │   └── get_f0.py
    ├── morph
    │   ├── __init__.py
    │   └── intensity_morph.py
    ├── signals
    │   ├── __init__.py
    │   ├── audio_scripts.py
    │   └── data_fitting.py
    ├── speech_detection
    │   ├── __init__.py
    │   ├── common.py
    │   ├── naive_vad.py
    │   ├── naive_vad_efficient.py
    │   ├── segment_stereo_speech.py
    │   ├── segment_stereo_speech_efficient.py
    │   ├── split_on_tone.py
    │   └── textgrids.py
    ├── speech_filters
    │   ├── __init__.py
    │   └── speech_shaped_noise.py
    ├── speech_rate
    │   ├── __init__.py
    │   ├── dictionary_estimate.py
    │   └── uwe_sr.py
    ├── text
    │   ├── __init__.py
    │   ├── frequency.py
    │   └── transcript.py
    ├── textgrids
    │   ├── __init__.py
    │   ├── syllabify_textgrids.py
    │   └── textgrids.py
    └── utilities
    │   ├── __init__.py
    │   ├── error_utils.py
    │   ├── filters.py
    │   ├── matlab.py
    │   ├── my_math.py
    │   ├── normalize.py
    │   ├── sequences.py
    │   ├── statistics.py
    │   └── utils.py
├── resources
    ├── buckeye_counts.txt
    ├── buckeye_frequency_counts.csv
    ├── fischer_counts.txt
    ├── spoken_corpora_frequeny_counts.csv
    └── switchboard_counts.txt
├── setup.py
└── tests
    ├── __init__.py
    ├── integration
        ├── __init__.py
        └── test_integration.py
    ├── test_sequences.py
    └── test_statistics.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | eggs/
15 | lib/
16 | lib64/
17 | parts/
18 | sdist/
19 | var/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | 
24 | # PyInstaller
25 | #  Usually these files are written by a python script from a template
26 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
27 | *.manifest
28 | *.spec
29 | 
30 | *.DS_Store
31 | 
32 | *.project
33 | *.pydevproject
34 | 
35 | examples/files/*/
36 | examples/graveyard/*
37 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # PyAcoustics Changelog
 3 | 
 4 | *PyAcoustics uses semantic versioning (Major.Minor.Patch)*
 5 | 
 6 | Ver 2.0 (July 15, 2023)
 7 | - drop support for Python 2.7
 8 | 
 9 | Ver 1.0 (June 7, 2015)
10 | - first public release.
11 | 
12 | 
13 | ## Features as they are added
14 | 
15 | I was not using semantic versioning at the time I added these features and did not bump the version number.
16 | 
17 | Mask speech with speech shaped noise
18 | (March 21, 2016)
19 | 
20 | Find syllable nuclei/estimate speech rate using Uwe Reichel's matlab code
21 | (July 29, 2015)
22 | 
23 | Find the valley bottom between peaks (July 7th, 2015)
24 | 


--------------------------------------------------------------------------------
/DEVELOP.md:
--------------------------------------------------------------------------------
 1 | 
 2 | These are development notes for myself
 3 | 
 4 | ## Documentation
 5 | 
 6 | Documentation is generated with the following command:
 7 | `pdoc pyacoustics -d google -o docs`
 8 | 
 9 | A live version can be seen with
10 | `pdoc pyacoustics -d google`
11 | 
12 | pdoc will read from pyacoustics, as installed on the computer, so you may need to run `pip install .` if you want to generate documentation from a locally edited version of pyacoustics.
13 | 
14 | ## Tests
15 | 
16 | Tests are run with
17 | 
18 | `pytest --cov=pyacoustics tests/`
19 | 
20 | ## Release
21 | 
22 | Releases are built and deployed with:
23 | 
24 | `python setup.py bdist_wheel sdist`
25 | 
26 | `twine upload dist/*`
27 | 
28 | Don't forget to tag the release.
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This code contains portions with different licenses.  Unless otherwise stated
 2 | in this document, assume that the code is covered by the MIT license as stated below.
 3 | 
 4 | The MIT License (MIT)
 5 | 
 6 | Copyright (c) 2015, 2016, 2017, 2018, 2019, 2020 Tim Mahrt
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in all
16 | copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | SOFTWARE.
25 | 
26 | --------------------------
27 | 
28 | The matlab scripts contained in the folder 'nucleus_detection_matlab' were
29 | written by Uwe Reichel.  They are released here under the MIT license with
30 | Uwe Reichel's permission.
31 | 
32 | --------------------------
33 | 
34 | The file speech_shaped_noise.py contains code redistributed from the project
35 | pambox.  
36 | https://github.com/achabotl/pambox
37 | 
38 | The modified portion of the pambox code included in speech_shaped_noise.py is
39 | redistributed with the following license:
40 | 
41 | Copyright (c) 2014, Alexandre Chabot-Leclerc
42 | All rights reserved.
43 | 
44 | Redistribution and use in source and binary forms, with or without
45 | modification, are permitted provided that the following conditions are met:
46 | 
47 | * Redistributions of source code must retain the above copyright notice, this
48 |   list of conditions and the following disclaimer.
49 | 
50 | * Redistributions in binary form must reproduce the above copyright notice,
51 |   this list of conditions and the following disclaimer in the documentation
52 |   and/or other materials provided with the distribution.
53 | 
54 | * Neither the name of the Technical University of Denmark nor the names of its
55 |   contributors may be used to endorse or promote products derived from
56 |   this software without specific prior written permission.
57 | 
58 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
59 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
61 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
62 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
64 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
65 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
66 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
67 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include praatScripts/*.praat


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # pyAcoustics
  3 | 
  4 | [![](https://img.shields.io/badge/license-MIT-blue.svg?)](http://opensource.org/licenses/MIT)
  5 | 
  6 | A collection of python scripts for extracting and analyzing acoustics from audio files.
  7 | 
  8 | # Table of contents
  9 | 1. [Common use cases](#common-use-cases)
 10 | 2. [Version history](#version-history)
 11 | 3. [Requirements](#requirements)
 12 | 4. [Installation](#installation)
 13 | 5. [Example usage](#example-usage)
 14 | 6. [Citing pyAcoustics](#citing-pyacoustics)
 15 | 7. [Acknowledgements](#acknowledgements)
 16 | 
 17 | ## Common Use Cases
 18 | 
 19 | What can you do with this library?
 20 | 
 21 | - Extract pitch and intensity::
 22 | 
 23 |     pyacoustics.intensity_and_pitch.praat_pi.getPraatPitchAndIntensity()
 24 | 
 25 | - Extract segments of a wav file::
 26 | 
 27 |     pyacoustics.signals.audio_scripts.getSubwav()
 28 | 
 29 | - Perform simple manipulations on wav files::
 30 | 
 31 |     pyacoustics.signals.resampleAudio()
 32 | 
 33 |     pyacoustics.signals.splitStereoAudio()
 34 | 
 35 | - Split audio files on segments of silence or on pure tones::
 36 | 
 37 |     pyacoustics.speech_detection.split_on_tone.splitFileOnTone()
 38 | 
 39 | - Programmatically manipulate pitch or duration of a file::
 40 | 
 41 |     pyacoustics.morph.morph_utils.praat_pitch()
 42 |     
 43 | - Mask speech with speech shaped noise::
 44 | 
 45 |     pyacoustics.speech_filters.speech_shaped_noise.batchMaskSpeakerData()
 46 | 
 47 | - And more!
 48 | 
 49 | 
 50 | ## Version history
 51 | 
 52 | *Praatio uses semantic versioning (Major.Minor.Patch)*
 53 | 
 54 | Please view [CHANGELOG.md](<https://github.com/timmahrt/pyAcoustics/blob/main/CHANGELOG.md>) for version history.
 55 | 
 56 | 
 57 | ## Requirements
 58 | 
 59 | Many of the individual features require different packages.  If you aren't using those
 60 | packages then you don't need to install the dependencies.
 61 | 
 62 | pyacoustics.intensity_and_pitch.praat_pi requires 
 63 | [praat](<http://www.fon.hum.uva.nl/praat/>)
 64 | 
 65 | pyacoustics.intensity_and_pitch.get_f0 requires the ESPS getF0 function as implemented 
 66 | by [Snack](<http://www.speech.kth.se/snack/>) although I recall having difficulty 
 67 | installing it.
 68 | 
 69 | pyacoustics.speech_rate/dictionary_estimate.py requires my library 
 70 | [psyle](<https://github.com/timmahrt/pysle>)
 71 | 
 72 | pyacoustics.signals.data_fitting.py requires
 73 | [SciPy](<http://www.scipy.org/>),
 74 | [NumPy](<http://www.numpy.org/>), and
 75 | [scikit-learn](<http://scikit-learn.org/>)
 76 | 
 77 | My praatIO library is used extensively and can be downloaded 
 78 | [here](<https://github.com/timmahrt/praatIO>)
 79 | 
 80 | 
 81 | ## Installation
 82 | 
 83 | PyAcoustics is on pypi and can be installed or upgraded from the command-line shell with pip like so::
 84 | 
 85 |     python -m pip install pyacoustics --upgrade
 86 | 
 87 | Otherwise, to manually install, after downloading the source from github, from a command-line shell, navigate to the directory containing setup.py and type::
 88 | 
 89 |     python setup.py install
 90 | 
 91 | If python is not in your path, you'll need to enter the full path e.g.::
 92 | 
 93 | 	C:\Python36\python.exe setup.py install
 94 | 
 95 |     
 96 | ## Example usage
 97 | 
 98 | See the example folders for a few real-world examples using this library.
 99 | 
100 | - examples/split_audio_on_silence.py
101 | 
102 |     Detects the presence of speech in a recording based on acoustic 
103 |     intensity.  Everything louder than some threshold specified by
104 |     the user is considered speech.
105 |     
106 | - examples/split_audio_on_tone.py
107 | 
108 |     Detects the presence of pure tones in a recording.  One can use
109 |     this to automatically segment stimuli.  Beeps can be played while
110 |     the speech is being recorded and then later this tool can
111 |     automatically segment the speech, based on the presence of those
112 |     tones.
113 |     
114 |     Also detects speech using a pitch analysis.  Most syllables
115 |     contain some voicing, so a stream of modulating pitch values
116 |     suggests that someone is speaking.  This aspect is not extensively
117 |     tested but it works well for the example files.
118 | 
119 | - examples/estimate_speech_rate.py
120 | 
121 |     Calculates the speech rate through a matlab script written by
122 |     [Uwe Reichel](<http://www.phonetik.uni-muenchen.de/~reichelu/>)
123 |     that estimates the location of syllable boundaries.
124 | 
125 | ## Citing PyAcoustics
126 | 
127 | PyAcoustics is general purpose coding and doesn't need to be cited
128 | but if you would like to, it can be cited like so:
129 | 
130 | Tim Mahrt. PyAcoustics. https://github.com/timmahrt/pyAcoustics, 2016.
131 | 
132 | 
133 | ## Acknowledgements
134 | 
135 | PyAcoustics is an ongoing collection of code with contributions from a
136 | number of projects worked on over several years.  Development of various
137 | aspects of PyAcoustics was possible thanks to
138 | NSF grant **IIS 07-03624**
139 | to Jennifer Cole and Mark Hasegawa-Johnson,
140 | NSF grant BCS **12-51343**
141 | to Jennifer Cole, José Hualde, and Caroline Smith, and
142 | NSF grant
143 | **IBSS SMA 14-16791** to Jennifer Cole, Nancy McElwain, and Daniel Berry.
144 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/examples/__init__.py


--------------------------------------------------------------------------------
/examples/estimate_speech_rate.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Jul 27, 2015
  3 | 
  4 | @author: tmahrt
  5 | 
  6 | Two examples of how to use uwe_sr with two different types of data and
  7 | two different tasks.
  8 | 
  9 | First, it is possible to run this data on either whole files or on segments
 10 | of a file (here the segment times are extracted from a textgrid but you could
 11 | use other input sources).
 12 | 
 13 | Second, in one task, the syllable nuclei are seralized in a textgrid.  In the
 14 | other task, the speech rate is calculated.
 15 | """
 16 | 
 17 | from os.path import join
 18 | 
 19 | from praatio import textgrid
 20 | from praatio import praatio_scripts
 21 | 
 22 | from pyacoustics.signals import audio_scripts
 23 | from pyacoustics.speech_rate import uwe_sr
 24 | from pyacoustics.utilities import utils
 25 | from pyacoustics.utilities import my_math
 26 | 
 27 | 
 28 | def _runSpeechRateEstimate(
 29 |     wavPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd=True
 30 | ):
 31 |     uwe_sr.findSyllableNuclei(
 32 |         wavPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd
 33 |     )
 34 | 
 35 | 
 36 | def _runSpeechRateEstimateOnIntervals(
 37 |     wavPath,
 38 |     tgPath,
 39 |     tierName,
 40 |     wavTmpPath,
 41 |     syllableNucleiPath,
 42 |     matlabEXE,
 43 |     matlabScriptsPath,
 44 |     printCmd=True,
 45 |     outputTGFlag=False,
 46 | ):
 47 |     utils.makeDir(wavTmpPath)
 48 |     # Split audio files into subsections based on textgrid intervals
 49 |     for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):
 50 |         praatio_scripts.splitAudioOnTier(
 51 |             join(wavPath, name + ".wav"),
 52 |             join(tgPath, name + ".TextGrid"),
 53 |             tierName,
 54 |             wavTmpPath,
 55 |             outputTGFlag,
 56 |         )
 57 | 
 58 |     uwe_sr.findSyllableNuclei(
 59 |         wavTmpPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd
 60 |     )
 61 | 
 62 | 
 63 | def _addSyllableNucleiToTextgrids(
 64 |     wavPath, tgPath, tierName, syllableNucleiPath, outputPath
 65 | ):
 66 |     # Add syllable nuclei to textgrids
 67 |     for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):
 68 |         tg = textgrid.openTextgrid(
 69 |             join(tgPath, name + ".TextGrid"), includeEmptyIntervals=False
 70 |         )
 71 |         entryList = tg.tierDict[tierName].entryList
 72 |         startTimeList = [entry[0] for entry in entryList]
 73 |         nucleusSyllableList = uwe_sr.toAbsoluteTime(
 74 |             name, syllableNucleiPath, startTimeList
 75 |         )
 76 |         flattenedSyllableList = [
 77 |             nuclei for sublist in nucleusSyllableList for nuclei in sublist
 78 |         ]
 79 |         wavFN = join(wavPath, name + ".wav")
 80 |         duration = audio_scripts.getSoundFileDuration(wavFN)
 81 | 
 82 |         oom = my_math.orderOfMagnitude(len(flattenedSyllableList))
 83 |         labelTemplate = "%%0%dd" % (oom + 1)
 84 | 
 85 |         entryList = [
 86 |             (timestamp, labelTemplate % i)
 87 |             for i, timestamp in enumerate(flattenedSyllableList)
 88 |         ]
 89 |         print(flattenedSyllableList)
 90 |         tier = textgrid.PointTier("Syllable Nuclei", entryList, 0, duration)
 91 | 
 92 |         tgFN = join(tgPath, name + ".TextGrid")
 93 |         tg = textgrid.openTextgrid(tgFN, includeEmptyIntervals=False)
 94 |         tg.addTier(tier)
 95 |         tg.save(
 96 |             join(outputPath, name + ".TextGrid"),
 97 |             format="short_textgrid",
 98 |             includeBlankSpaces=True,
 99 |         )
100 | 
101 | 
102 | def _calculateSyllablesPerSecond(wavPath, syllableNucleiPath):
103 |     for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):
104 |         nucleusSyllableList = uwe_sr.toAbsoluteTime(
105 |             name,
106 |             syllableNucleiPath,
107 |             [
108 |                 0,
109 |             ],
110 |         )
111 |         nucleusSyllableList = [
112 |             nucleus for subList in nucleusSyllableList for nucleus in subList
113 |         ]
114 |         numSyllables = len(nucleusSyllableList)
115 |         wavFN = join(wavPath, name + ".wav")
116 |         duration = audio_scripts.getSoundFileDuration(wavFN)
117 | 
118 |         print("%s - %.02f syllables/second" % (name, numSyllables / float(duration)))
119 | 
120 | 
121 | def _calculateSyllablesPerSecondForIntervals(
122 |     wavPath, tgPath, tierName, syllableNucleiPath
123 | ):
124 |     # Add syllable nuclei to textgrids
125 |     for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):
126 |         tg = textgrid.openTextgrid(
127 |             join(tgPath, name + ".TextGrid"), includeEmptyIntervals=False
128 |         )
129 |         entryList = tg.tierDict[tierName].entryList
130 |         startTimeList = [entry[0] for entry in entryList]
131 |         nucleusSyllableList = uwe_sr.toAbsoluteTime(
132 |             name, syllableNucleiPath, startTimeList
133 |         )
134 | 
135 |         durationList = []
136 |         for intervalList, entry in utils.safeZip(
137 |             [nucleusSyllableList, entryList], enforceLength=True
138 |         ):
139 |             start, stop = entry[0], entry[1]
140 |             duration = len(intervalList) / (stop - start)
141 |             durationList.append(str(duration))
142 | 
143 |         print(
144 |             "%s - %s (syllables/second for each interval)"
145 |             % (name, ",".join(durationList))
146 |         )
147 | 
148 | 
149 | def markupTextgridWithSyllableNuclei(
150 |     wavPath,
151 |     tgPath,
152 |     tierName,
153 |     wavTmpPath,
154 |     syllableNucleiPath,
155 |     matlabEXE,
156 |     matlabScriptsPath,
157 |     outputPath,
158 |     printCmd=True,
159 |     outputTGFlag=False,
160 | ):
161 |     utils.makeDir(outputPath)
162 | 
163 |     # This can be commented out and instead, you can run the code directly
164 |     # from matlab, then you can start directly from the next line
165 |     _runSpeechRateEstimateOnIntervals(
166 |         wavPath,
167 |         tgPath,
168 |         tierName,
169 |         wavTmpPath,
170 |         syllableNucleiPath,
171 |         matlabEXE,
172 |         matlabScriptsPath,
173 |         printCmd,
174 |         outputTGFlag,
175 |     )
176 | 
177 |     _addSyllableNucleiToTextgrids(
178 |         wavPath, tgPath, tierName, syllableNucleiPath, outputPath
179 |     )
180 | 
181 |     _calculateSyllablesPerSecondForIntervals(
182 |         wavPath, tgPath, tierName, syllableNucleiPath
183 |     )
184 | 
185 | 
186 | def getSpeechRateForIntervals(
187 |     wavPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd=True
188 | ):
189 |     # This can be commented out and instead, you can run the code directly
190 |     # from matlab, then you can start directly from the next line
191 |     _runSpeechRateEstimate(
192 |         wavPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd
193 |     )
194 | 
195 |     _calculateSyllablesPerSecond(wavPath, syllableNucleiPath)
196 | 
197 | 
198 | if __name__ == "__main__":
199 |     _rootDir = "/Users/tmahrt/Dropbox/workspace/pyAcoustics/examples/files"
200 |     _wavPath = _rootDir
201 |     _syllableNucleiPath = join(_rootDir, "syllableNuclei_portions")
202 |     _matlabEXE = "/Applications/MATLAB_R2014a.app/bin/matlab"
203 |     _matlabScriptsPath = "/Users/tmahrt/Dropbox/workspace/pyAcoustics/" "matlabScripts"
204 | 
205 |     #     getSpeechRateForIntervals(_wavPath, _syllableNucleiPath, _matlabEXE,
206 |     #                               _matlabScriptsPath)
207 | 
208 |     _wavTmpPath = join(_wavPath, "subset_wav_files")
209 |     _tgPath = _rootDir
210 |     _tierName = "utterances"
211 |     _syllableNucleiPath = join(_rootDir, "syllableNuclei_whole")
212 |     _outputPath = join(_rootDir, "textgrids_w_syllable_nucleus_markings")
213 | 
214 |     markupTextgridWithSyllableNuclei(
215 |         _wavPath,
216 |         _tgPath,
217 |         _tierName,
218 |         _wavTmpPath,
219 |         _syllableNucleiPath,
220 |         _matlabEXE,
221 |         _matlabScriptsPath,
222 |         _outputPath,
223 |     )
224 | 


--------------------------------------------------------------------------------
/examples/files/introduction.TextGrid:
--------------------------------------------------------------------------------
  1 | File type = "ooTextFile short"
  2 | "TextGrid"
  3 | 
  4 | 0.0
  5 | 17.516375
  6 | <exists>
  7 | 3
  8 | "IntervalTier"
  9 | "utterances"
 10 | 0.0
 11 | 17.516375
 12 | 17
 13 | 0.0
 14 | 0.181470716579
 15 | ""
 16 | 0.181470716579
 17 | 0.687915993749
 18 | "こんにちは"
 19 | 0.687915993749
 20 | 1.30596645968
 21 | ""
 22 | 1.30596645968
 23 | 3.32877817967
 24 | "私は、ティム·マートです"
 25 | 3.32877817967
 26 | 3.93472274005
 27 | ""
 28 | 3.93472274005
 29 | 5.33896478585
 30 | "私は学生です"
 31 | 5.33896478585
 32 | 6.04797904058
 33 | ""
 34 | 6.04797904058
 35 | 7.67648659987
 36 | "日本語を勉強します。"
 37 | 7.67648659987
 38 | 8.03355270487
 39 | ""
 40 | 8.03355270487
 41 | 10.2762026369
 42 | "この夏は日本にいきました"
 43 | 10.2762026369
 44 | 10.8441923856
 45 | ""
 46 | 10.8441923856
 47 | 13.7353684085
 48 | "東京行きましたも四国行きました"
 49 | 13.7353684085
 50 | 14.3480198798
 51 | ""
 52 | 14.3480198798
 53 | 15.186091973
 54 | "楽しかった"
 55 | 15.186091973
 56 | 15.9461478394
 57 | ""
 58 | 15.9461478394
 59 | 17.4539965371
 60 | "ラーメン大好きです"
 61 | 17.4539965371
 62 | 17.516375
 63 | ""
 64 | "IntervalTier"
 65 | "words"
 66 | 0.0
 67 | 17.516375
 68 | 21
 69 | 0.0
 70 | 0.181470716579
 71 | ""
 72 | 0.181470716579
 73 | 0.687915993749
 74 | "こんにちは"
 75 | 0.687915993749
 76 | 1.30596645968
 77 | ""
 78 | 1.30596645968
 79 | 2.02596645968
 80 | "私は"
 81 | 2.02596645968
 82 | 3.32877817967
 83 | "ティムマートです"
 84 | 3.32877817967
 85 | 3.93472274005
 86 | ""
 87 | 3.93472274005
 88 | 5.33896478585
 89 | "私は学生です"
 90 | 5.33896478585
 91 | 6.04797904058
 92 | ""
 93 | 6.04797904058
 94 | 7.67648659987
 95 | "日本語を勉強します"
 96 | 7.67648659987
 97 | 8.03355270487
 98 | ""
 99 | 8.03355270487
100 | 8.22355270487
101 | "この"
102 | 8.22355270487
103 | 8.90355270487
104 | "夏は"
105 | 8.90355270487
106 | 10.2762026369
107 | "日本にいきました"
108 | 10.2762026369
109 | 10.8441923856
110 | ""
111 | 10.8441923856
112 | 12.5541923856
113 | "東京行きましたも"
114 | 12.5541923856
115 | 13.7353684085
116 | "四国行きました"
117 | 13.7353684085
118 | 14.3480198798
119 | ""
120 | 14.3480198798
121 | 15.186091973
122 | "楽しかった"
123 | 15.186091973
124 | 15.9461478394
125 | ""
126 | 15.9461478394
127 | 17.4539965371
128 | "ラーメン大好きです"
129 | 17.4539965371
130 | 17.516375
131 | ""
132 | "IntervalTier"
133 | "phones"
134 | 0.0
135 | 17.516375
136 | 159
137 | 0.0
138 | 0.181470716579
139 | ""
140 | 0.181470716579
141 | 0.201470716579
142 | "k"
143 | 0.201470716579
144 | 0.231470716579
145 | "o"
146 | 0.231470716579
147 | 0.261470716579
148 | "N"
149 | 0.261470716579
150 | 0.291470716579
151 | "n"
152 | 0.291470716579
153 | 0.361470716579
154 | "i"
155 | 0.361470716579
156 | 0.421470716579
157 | "ch"
158 | 0.421470716579
159 | 0.451470716579
160 | "i"
161 | 0.451470716579
162 | 0.541470716579
163 | "w"
164 | 0.541470716579
165 | 0.687915993749
166 | "a"
167 | 0.687915993749
168 | 1.30596645968
169 | ""
170 | 1.30596645968
171 | 1.32596645968
172 | "w"
173 | 1.32596645968
174 | 1.35596645968
175 | "a"
176 | 1.35596645968
177 | 1.40596645968
178 | "t"
179 | 1.40596645968
180 | 1.45596645968
181 | "a"
182 | 1.45596645968
183 | 1.57596645968
184 | "sh"
185 | 1.57596645968
186 | 1.66596645968
187 | "i"
188 | 1.66596645968
189 | 1.88596645968
190 | "w"
191 | 1.88596645968
192 | 2.02596645968
193 | "a"
194 | 2.02596645968
195 | 2.15596645968
196 | "t"
197 | 2.15596645968
198 | 2.28596645968
199 | "i"
200 | 2.28596645968
201 | 2.32596645968
202 | "m"
203 | 2.32596645968
204 | 2.51596645968
205 | "u"
206 | 2.51596645968
207 | 2.69596645968
208 | "m"
209 | 2.69596645968
210 | 2.82596645968
211 | "a:"
212 | 2.82596645968
213 | 2.88596645968
214 | "t"
215 | 2.88596645968
216 | 2.92596645968
217 | "o"
218 | 2.92596645968
219 | 2.98596645968
220 | "d"
221 | 2.98596645968
222 | 3.05596645968
223 | "e"
224 | 3.05596645968
225 | 3.26596645968
226 | "s"
227 | 3.26596645968
228 | 3.32877817967
229 | "u"
230 | 3.32877817967
231 | 3.93472274005
232 | ""
233 | 3.93472274005
234 | 3.95472274005
235 | "w"
236 | 3.95472274005
237 | 3.98472274005
238 | "a"
239 | 3.98472274005
240 | 4.02472274005
241 | "t"
242 | 4.02472274005
243 | 4.06472274005
244 | "a"
245 | 4.06472274005
246 | 4.14472274005
247 | "sh"
248 | 4.14472274005
249 | 4.18472274005
250 | "i"
251 | 4.18472274005
252 | 4.36472274005
253 | "w"
254 | 4.36472274005
255 | 4.49472274005
256 | "a"
257 | 4.49472274005
258 | 4.64472274005
259 | "g"
260 | 4.64472274005
261 | 4.70472274005
262 | "a"
263 | 4.70472274005
264 | 4.73472274005
265 | "k"
266 | 4.73472274005
267 | 4.76472274005
268 | "u"
269 | 4.76472274005
270 | 4.89472274005
271 | "s"
272 | 4.89472274005
273 | 4.93472274005
274 | "e"
275 | 4.93472274005
276 | 4.96472274005
277 | "i"
278 | 4.96472274005
279 | 5.02472274005
280 | "d"
281 | 5.02472274005
282 | 5.11472274005
283 | "e"
284 | 5.11472274005
285 | 5.27472274005
286 | "s"
287 | 5.27472274005
288 | 5.33896478585
289 | "u"
290 | 5.33896478585
291 | 6.04797904058
292 | ""
293 | 6.04797904058
294 | 6.06797904058
295 | "n"
296 | 6.06797904058
297 | 6.10797904058
298 | "i"
299 | 6.10797904058
300 | 6.16797904058
301 | "h"
302 | 6.16797904058
303 | 6.22797904058
304 | "o"
305 | 6.22797904058
306 | 6.28797904058
307 | "N"
308 | 6.28797904058
309 | 6.34797904058
310 | "g"
311 | 6.34797904058
312 | 6.37797904058
313 | "o"
314 | 6.37797904058
315 | 6.40797904058
316 | "w"
317 | 6.40797904058
318 | 6.72797904058
319 | "o"
320 | 6.72797904058
321 | 6.84797904058
322 | "b"
323 | 6.84797904058
324 | 6.92797904058
325 | "e"
326 | 6.92797904058
327 | 6.95797904058
328 | "N"
329 | 6.95797904058
330 | 7.06797904058
331 | "ky"
332 | 7.06797904058
333 | 7.10797904058
334 | "o:"
335 | 7.10797904058
336 | 7.22797904058
337 | "sh"
338 | 7.22797904058
339 | 7.25797904058
340 | "i"
341 | 7.25797904058
342 | 7.30797904058
343 | "m"
344 | 7.30797904058
345 | 7.41797904058
346 | "a"
347 | 7.41797904058
348 | 7.61797904058
349 | "s"
350 | 7.61797904058
351 | 7.67648659987
352 | "u"
353 | 7.67648659987
354 | 8.03355270487
355 | ""
356 | 8.03355270487
357 | 8.07355270487
358 | "k"
359 | 8.07355270487
360 | 8.10355270487
361 | "o"
362 | 8.10355270487
363 | 8.14355270487
364 | "n"
365 | 8.14355270487
366 | 8.22355270487
367 | "o"
368 | 8.22355270487
369 | 8.25355270487
370 | "n"
371 | 8.25355270487
372 | 8.34355270487
373 | "a"
374 | 8.34355270487
375 | 8.44355270487
376 | "ts"
377 | 8.44355270487
378 | 8.47355270487
379 | "u"
380 | 8.47355270487
381 | 8.63355270487
382 | "w"
383 | 8.63355270487
384 | 8.90355270487
385 | "a"
386 | 8.90355270487
387 | 8.99355270487
388 | "n"
389 | 8.99355270487
390 | 9.08355270487
391 | "i"
392 | 9.08355270487
393 | 9.11355270487
394 | "p"
395 | 9.11355270487
396 | 9.18355270487
397 | "o"
398 | 9.18355270487
399 | 9.30355270487
400 | "N"
401 | 9.30355270487
402 | 9.35355270487
403 | "n"
404 | 9.35355270487
405 | 9.66355270487
406 | "i"
407 | 9.66355270487
408 | 9.74355270487
409 | "i"
410 | 9.74355270487
411 | 9.81355270487
412 | "k"
413 | 9.81355270487
414 | 9.85355270487
415 | "i"
416 | 9.85355270487
417 | 9.91355270487
418 | "m"
419 | 9.91355270487
420 | 9.96355270487
421 | "a"
422 | 9.96355270487
423 | 10.0435527049
424 | "sh"
425 | 10.0435527049
426 | 10.0735527049
427 | "i"
428 | 10.0735527049
429 | 10.1035527049
430 | "t"
431 | 10.1035527049
432 | 10.2762026369
433 | "a"
434 | 10.2762026369
435 | 10.8441923856
436 | ""
437 | 10.8441923856
438 | 10.8741923856
439 | "t"
440 | 10.8741923856
441 | 10.9741923856
442 | "o:"
443 | 10.9741923856
444 | 11.0741923856
445 | "ky"
446 | 11.0741923856
447 | 11.2241923856
448 | "o:"
449 | 11.2241923856
450 | 11.2541923856
451 | "i"
452 | 11.2541923856
453 | 11.3641923856
454 | "k"
455 | 11.3641923856
456 | 11.5241923856
457 | "i"
458 | 11.5241923856
459 | 11.5941923856
460 | "m"
461 | 11.5941923856
462 | 11.6541923856
463 | "a"
464 | 11.6541923856
465 | 11.7341923856
466 | "sh"
467 | 11.7341923856
468 | 11.7641923856
469 | "i"
470 | 11.7641923856
471 | 11.8041923856
472 | "t"
473 | 11.8041923856
474 | 12.0441923856
475 | "a"
476 | 12.0441923856
477 | 12.1441923856
478 | "m"
479 | 12.1441923856
480 | 12.5541923856
481 | "o"
482 | 12.5541923856
483 | 12.6741923856
484 | "sh"
485 | 12.6741923856
486 | 12.7041923856
487 | "i"
488 | 12.7041923856
489 | 12.8141923856
490 | "k"
491 | 12.8141923856
492 | 12.9141923856
493 | "o"
494 | 12.9141923856
495 | 12.9741923856
496 | "k"
497 | 12.9741923856
498 | 13.1041923856
499 | "u"
500 | 13.1041923856
501 | 13.1941923856
502 | "i"
503 | 13.1941923856
504 | 13.2241923856
505 | "k"
506 | 13.2241923856
507 | 13.2741923856
508 | "i"
509 | 13.2741923856
510 | 13.3141923856
511 | "m"
512 | 13.3141923856
513 | 13.3841923856
514 | "a"
515 | 13.3841923856
516 | 13.4741923856
517 | "sh"
518 | 13.4741923856
519 | 13.5041923856
520 | "i"
521 | 13.5041923856
522 | 13.5441923856
523 | "t"
524 | 13.5441923856
525 | 13.7353684085
526 | "a"
527 | 13.7353684085
528 | 14.3480198798
529 | ""
530 | 14.3480198798
531 | 14.3680198798
532 | "t"
533 | 14.3680198798
534 | 14.4280198798
535 | "a"
536 | 14.4280198798
537 | 14.4880198798
538 | "n"
539 | 14.4880198798
540 | 14.5480198798
541 | "o"
542 | 14.5480198798
543 | 14.6880198798
544 | "sh"
545 | 14.6880198798
546 | 14.8180198798
547 | "i"
548 | 14.8180198798
549 | 14.8780198798
550 | "k"
551 | 14.8780198798
552 | 14.9880198798
553 | "a"
554 | 14.9880198798
555 | 15.0180198798
556 | "t"
557 | 15.0180198798
558 | 15.186091973
559 | "a"
560 | 15.186091973
561 | 15.9461478394
562 | ""
563 | 15.9461478394
564 | 16.0461478394
565 | "r"
566 | 16.0461478394
567 | 16.1561478394
568 | "a:"
569 | 16.1561478394
570 | 16.2861478394
571 | "m"
572 | 16.2861478394
573 | 16.3261478394
574 | "e"
575 | 16.3261478394
576 | 16.6161478394
577 | "N"
578 | 16.6161478394
579 | 16.7661478394
580 | "d"
581 | 16.7661478394
582 | 16.8461478394
583 | "a"
584 | 16.8461478394
585 | 16.8961478394
586 | "i"
587 | 16.8961478394
588 | 16.9661478394
589 | "s"
590 | 16.9661478394
591 | 16.9961478394
592 | "u"
593 | 16.9961478394
594 | 17.0261478394
595 | "k"
596 | 17.0261478394
597 | 17.1161478394
598 | "i"
599 | 17.1161478394
600 | 17.1561478394
601 | "d"
602 | 17.1561478394
603 | 17.2361478394
604 | "e"
605 | 17.2361478394
606 | 17.3961478394
607 | "s"
608 | 17.3961478394
609 | 17.4539965371
610 | "u"
611 | 17.4539965371
612 | 17.516375
613 | ""
614 | 


--------------------------------------------------------------------------------
/examples/files/introduction.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/examples/files/introduction.wav


--------------------------------------------------------------------------------
/examples/files/tone_split_data.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | 0
 5 | 16.8040625
 6 | <exists>
 7 | 1
 8 | "IntervalTier"
 9 | "utterances"
10 | 0
11 | 16.8040625
12 | 9
13 | 0
14 | 2.144973011908906
15 | ""
16 | 2.144973011908906
17 | 3.7520176961273086
18 | "01"
19 | 3.7520176961273086
20 | 6.400665416413194
21 | ""
22 | 6.400665416413194
23 | 7.724989276556136
24 | "02"
25 | 7.724989276556136
26 | 10.090916172766562
27 | ""
28 | 10.090916172766562
29 | 11.802121160591712
30 | "03"
31 | 11.802121160591712
32 | 14.361488620643241
33 | ""
34 | 14.361488620643241
35 | 15.98341334823404
36 | "04"
37 | 15.98341334823404
38 | 16.8040625
39 | ""
40 | 


--------------------------------------------------------------------------------
/examples/files/tone_split_data.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/examples/files/tone_split_data.wav


--------------------------------------------------------------------------------
/examples/frequency.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Apr 16, 2018
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import io
 8 | from os.path import join
 9 | from pyacoustics.text import frequency
10 | 
11 | rootPath = r"/Users/tmahrt/Dropbox/workspace/pyAcoustics/resources"
12 | outputFN = r"/Users/tmahrt/Desktop/buckeye_frequency_counts.csv"
13 | 
14 | buckeye = frequency.Buckeye(join(rootPath, "buckeye_counts.txt"))
15 | fischer = frequency.Fischer(join(rootPath, "fischer_counts.txt"))
16 | # google = frequency.GoogleUnigram(join(rootPath, "google.letter.unigram")) # Too large to include?  License issue?
17 | switchboard = frequency.SwitchboardTim(join(rootPath, "switchboard_counts.txt"))
18 | 
19 | outputList = []
20 | wordList = list(buckeye.frequencyDict.keys())
21 | wordList.sort()
22 | sumV = 0
23 | for word in wordList:
24 | 
25 |     # Not including words that were tagged for any reason
26 |     if word[0] == "[":
27 |         continue
28 | 
29 |     sumV += buckeye.getFrequency(word, outOfDictionaryValue=0)[0]
30 | 
31 |     row = [
32 |         word,
33 |     ]
34 |     for corpus in [
35 |         buckeye,
36 |         fischer,
37 |         # google,
38 |         switchboard,
39 |     ]:
40 |         row.extend(corpus.getFrequency(word, outOfDictionaryValue=""))
41 | 
42 |     rowTxt = ",".join([str(val) for val in row])
43 |     outputList.append(rowTxt)
44 | 
45 | outputTxt = u"\n".join(outputList)
46 | with io.open(outputFN, "w") as fd:
47 |     fd.write(outputTxt)
48 | 
49 | print(sumV)
50 | 


--------------------------------------------------------------------------------
/examples/split_audio_on_silence.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from os.path import join
  3 | 
  4 | import datetime
  5 | 
  6 | now = datetime.datetime.now
  7 | 
  8 | from praatio import pitch_and_intensity
  9 | 
 10 | from pyacoustics.speech_detection import naive_vad
 11 | from pyacoustics.signals import audio_scripts
 12 | from pyacoustics.signals import data_fitting
 13 | from pyacoustics.utilities import utils
 14 | from pyacoustics.utilities import my_math
 15 | 
 16 | from praatio import textgrid
 17 | 
 18 | 
 19 | def audiosplitSilence(
 20 |     inputPath,
 21 |     fn,
 22 |     tgPath,
 23 |     pitchPath,
 24 |     subwavPath,
 25 |     minPitch,
 26 |     maxPitch,
 27 |     stepSize,
 28 |     numSteps,
 29 |     praatEXE,
 30 |     praatScriptPath,
 31 |     generateWavs=False,
 32 |     numSegmentsToExtract=None,
 33 | ):
 34 |     """
 35 |     Extract the non-silence portions of a file
 36 | 
 37 |     minPitch - the speaker's minimum pitch
 38 |     maxPitch - the speaker's maximum pitch
 39 |     intensityPercentile - Given the distribution of intensity values in a file,
 40 |                             the intensity threshold to use is the one that
 41 |                             falls at /intensityPercentile/
 42 |                             Any intensity values less than the intensity
 43 |                             threshold will be considered silence.
 44 |                             I typically use a value between 0.2 or 0.3.
 45 |     stepSize - non-overlapping step size (in seconds)
 46 |     numSteps - number of consecutive blocks needed for a segment to be
 47 |                 considered silence
 48 |                 stepSize * numSteps is the smallest possible interval that
 49 |                 can be considered silence/not-silence.
 50 |     praatEXE - fullpath to a praat executable.  On Windows use praatcon.exe.
 51 |                 Other systems use praat
 52 |     praatScriptPath - location of the folder containing praat scripts that
 53 |                         is distributed with pyAcoustics
 54 |     numSegmentsToExtract - if not None remove all but the X loudest segments as
 55 |                             specified by /numSegmentsToExtract/.  Otherwise,
 56 |                             all non-silent segments are kept.
 57 |     generateWavs - if False, no wavefiles are extracted, but you can look at
 58 |                     the generated textgrids to see which wavefiles would have
 59 |                     been extracted
 60 |     """
 61 |     utils.makeDir(tgPath)
 62 |     utils.makeDir(pitchPath)
 63 |     utils.makeDir(subwavPath)
 64 | 
 65 |     name = os.path.splitext(fn)[0]
 66 | 
 67 |     piSamplingRate = 100  # Samples per second
 68 |     sampleStep = 1 / float(piSamplingRate)
 69 |     outputFN = os.path.splitext(fn)[0] + ".txt"
 70 |     motherPIList = pitch_and_intensity.extractPI(
 71 |         join(inputPath, fn),
 72 |         join(pitchPath, outputFN),
 73 |         praatEXE,
 74 |         minPitch,
 75 |         maxPitch,
 76 |         sampleStep=sampleStep,
 77 |         forceRegenerate=False,
 78 |     )
 79 | 
 80 |     # entry = (time, pitchVal, intVal)
 81 |     motherPIList = [float(entry[2]) for entry in motherPIList]
 82 | 
 83 |     # We need the intensity threshold to distinguish silence from speech/noise
 84 |     # Naively, we can extract this by getting the nth percent most intense
 85 |     # sound in the file naive_vad.getIntensityPercentile()
 86 |     # (but then, how do we determine the percent?)
 87 |     # Alternatively, we could consider the set of intensity values to be
 88 |     # bimodal -- silent values vs non-silent.  The best threshold is the one
 89 |     # that minimizes the overlap between the two distributions, obtained via
 90 |     # data_fitting.getBimodalValley()
 91 |     #     silenceThreshold = naive_vad.getIntensityPercentile(motherPIList,
 92 |     #                                                         intensityPercentile)
 93 |     silenceThreshold = data_fitting.getBimodalValley(motherPIList, doplot=True)
 94 |     print(silenceThreshold)
 95 |     entryList = naive_vad.naiveVAD(
 96 |         motherPIList, silenceThreshold, piSamplingRate, stepSize, numSteps
 97 |     )
 98 |     entryList = [(time[0], time[1], str(i)) for i, time in enumerate(entryList)]
 99 | 
100 |     # Filter out quieter sounds if necessary
101 |     if numSegmentsToExtract is not None:
102 |         # Get the rms energy of each non-silent region
103 |         rmsEntryList = []
104 |         for i, entry in enumerate(entryList):
105 |             intList = motherPIList[
106 |                 int(entry[0] * piSamplingRate) : int(entry[1] * piSamplingRate)
107 |             ]
108 | 
109 |             rmsVal = my_math.rms(intList)
110 |             rmsEntryList.append((rmsVal, entry))
111 | 
112 |         rmsEntryList.sort()  # Sort by energy
113 |         entryList = [rmsTuple[1] for rmsTuple in rmsEntryList[:numSegmentsToExtract]]
114 |         entryList.sort()  # Sort by time
115 | 
116 |     # Create the textgrid
117 |     tg = textgrid.Textgrid()
118 |     duration = audio_scripts.getSoundFileDuration(join(inputPath, fn))
119 |     tier = textgrid.IntervalTier("speech_tier", entryList, 0, duration)
120 |     tg.addTier(tier)
121 |     tg.save(
122 |         join(tgPath, name + ".TextGrid"),
123 |         format="short_textgrid",
124 |         includeBlankSpaces=True,
125 |     )
126 | 
127 |     if generateWavs is True:
128 |         for i, entry in enumerate(entryList):
129 |             subwavOutputFN = join(subwavPath, name + "_" + str(i) + ".wav")
130 |             audio_scripts.extractSubwav(
131 |                 join(inputPath, fn),
132 |                 subwavOutputFN,
133 |                 entry[0],
134 |                 entry[1],
135 |                 singleChannelFlag=True,
136 |             )
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     _minPitch = 50
141 |     _maxPitch = 450
142 |     _intensityPercentile = 0.3
143 |     _stepSize = 0.1
144 |     _numSteps = 5
145 | 
146 |     _fn = "introduction.wav"
147 |     _dataPath = join("/Users/tmahrt/Dropbox/workspace/pyAcoustics/examples/files")
148 |     _outputPath = join(_dataPath, "output_stepSize_0.1")
149 |     _tgPath = join(_dataPath, "splitAudio_silence_stepSize_0.1")
150 |     _pitchPath = join(_dataPath, "pitch")
151 |     _wavOutputPath = join(_dataPath, "output_wavs")
152 |     _praatEXE = "/Applications/praat.App/Contents/MacOS/Praat"
153 |     _praatScriptPath = "/Users/tmahrt/Dropbox/workspace/pyAcoustics/" "praatScripts"
154 |     utils.makeDir(_wavOutputPath)
155 |     _rootFolderName = os.path.splitext(os.path.split(_fn)[1])[0]
156 |     _subwavOutputPath = join(_wavOutputPath, _rootFolderName)
157 |     audiosplitSilence(
158 |         _dataPath,
159 |         _fn,
160 |         _tgPath,
161 |         _pitchPath,
162 |         _subwavOutputPath,
163 |         _minPitch,
164 |         _maxPitch,
165 |         _stepSize,
166 |         _numSteps,
167 |         _praatEXE,
168 |         _praatScriptPath,
169 |     )
170 | 
171 |     # Changing the parameters used in silence detection can lead to
172 |     # very different results
173 |     _stepSize = 0.025
174 |     _numSteps = 10
175 |     _tgPath = join(_dataPath, "splitAudio_silence_stepSize_0.025")
176 |     audiosplitSilence(
177 |         _dataPath,
178 |         _fn,
179 |         _tgPath,
180 |         _pitchPath,
181 |         _subwavOutputPath,
182 |         _minPitch,
183 |         _maxPitch,
184 |         _stepSize,
185 |         _numSteps,
186 |         _praatEXE,
187 |         _praatScriptPath,
188 |     )
189 | 


--------------------------------------------------------------------------------
/examples/split_audio_on_tone.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from os.path import join
  3 | 
  4 | from praatio import textgrid
  5 | from praatio import pitch_and_intensity
  6 | 
  7 | from pyacoustics.speech_detection import split_on_tone
  8 | from pyacoustics.utilities import utils
  9 | from pyacoustics.signals import audio_scripts
 10 | 
 11 | 
 12 | def audiosplitOnTone(
 13 |     inputPath,
 14 |     fn,
 15 |     pitchPath,
 16 |     tgPath,
 17 |     subwavPath,
 18 |     minPitch,
 19 |     maxPitch,
 20 |     toneFrequency,
 21 |     minEventDuration,
 22 |     praatEXE,
 23 |     praatScriptPath,
 24 |     forceRegen,
 25 |     generateWavs=False,
 26 | ):
 27 |     utils.makeDir(pitchPath)
 28 |     utils.makeDir(tgPath)
 29 |     utils.makeDir(subwavPath)
 30 | 
 31 |     name = os.path.splitext(fn)[0]
 32 |     piSamplingRate = 100  # Samples per second
 33 | 
 34 |     # Extract pitch and find patterns in the file
 35 |     outputFN = os.path.splitext(fn)[0] + ".txt"
 36 |     sampleStep = 1 / float(piSamplingRate)
 37 |     motherPIList = pitch_and_intensity.extractPI(
 38 |         join(inputPath, fn),
 39 |         join(pitchPath, outputFN),
 40 |         praatEXE,
 41 |         minPitch,
 42 |         maxPitch,
 43 |         sampleStep=sampleStep,
 44 |         forceRegenerate=forceRegen,
 45 |         undefinedValue=0.0,
 46 |     )
 47 |     # entry = (time, pitchVal, intVal)
 48 |     pitchList = [float(entry[1]) for entry in motherPIList]
 49 |     timeDict = split_on_tone.splitFileOnTone(
 50 |         pitchList, piSamplingRate, toneFrequency, minEventDuration
 51 |     )
 52 | 
 53 |     # Output result as textgrid
 54 |     duration = audio_scripts.getSoundFileDuration(join(inputPath, fn))
 55 |     tg = textgrid.Textgrid()
 56 |     for key in ["beep", "speech", "silence"]:
 57 |         entryList = timeDict[key]
 58 |         tier = textgrid.IntervalTier(key, entryList, 0, duration)
 59 |         tg.addTier(tier)
 60 |     tg.save(
 61 |         join(tgPath, name + ".TextGrid"),
 62 |         format="short_textgrid",
 63 |         includeBlankSpaces=True,
 64 |     )
 65 | 
 66 |     # Output audio portions between tones
 67 |     if generateWavs:
 68 |         split_on_tone.extractSubwavs(timeDict, inputPath, fn, subwavPath)
 69 | 
 70 | 
 71 | if __name__ == "__main__":
 72 |     _dataPath = "/Users/tmahrt/Dropbox/workspace/pyAcoustics/examples/files"
 73 |     _pitchPath = join(_dataPath, "split_on_tone_pitch")
 74 |     _tgPath = join(_dataPath, "split_on_tone_textgrids")
 75 |     _wavOutputPath = join(_dataPath, "split_on_tone_subwavs")
 76 |     _fn = "tone_split_data.wav"
 77 |     _minPitch = 50
 78 |     _maxPitch = 450
 79 |     _toneFrequency = 330  # Actual frequency is 333
 80 |     _minEventDuration = 0.2
 81 |     _forceRegeneratePitch = False
 82 |     _generateWavs = True
 83 | 
 84 |     _praatEXE = "/Applications/praat.App/Contents/MacOS/Praat"
 85 |     _praatScriptPath = "/Users/tmahrt/Dropbox/workspace/pyAcoustics/" "praatScripts"
 86 | 
 87 |     audiosplitOnTone(
 88 |         _dataPath,
 89 |         _fn,
 90 |         _pitchPath,
 91 |         _tgPath,
 92 |         _wavOutputPath,
 93 |         _minPitch,
 94 |         _maxPitch,
 95 |         _toneFrequency,
 96 |         _minEventDuration,
 97 |         _praatEXE,
 98 |         _praatScriptPath,
 99 |         _forceRegeneratePitch,
100 |         _generateWavs,
101 |     )
102 | 
103 |     # Let's try the same code with an incorrect tone frequency
104 |     _toneFrequency = 500
105 |     _tgPath = join(_dataPath, "split_on_tone_textgrids_500hz_tone")
106 |     _generateWavs = False
107 | 
108 |     audiosplitOnTone(
109 |         _dataPath,
110 |         _fn,
111 |         _pitchPath,
112 |         _tgPath,
113 |         _wavOutputPath,
114 |         _minPitch,
115 |         _maxPitch,
116 |         _toneFrequency,
117 |         _minEventDuration,
118 |         _praatEXE,
119 |         _praatScriptPath,
120 |         _forceRegeneratePitch,
121 |         _generateWavs,
122 |     )
123 | 


--------------------------------------------------------------------------------
/matlabScripts/detect_syllable_nuclei.m:
--------------------------------------------------------------------------------
 1 | %   Bootstrap script for Uwe Reichels nucleus detection.  Written by Tim Mahrt
 2 | function[] = detect_syllable_nuclei(path_to_files, output_path)
 3 | 
 4 | files = dir(fullfile(path_to_files,'*.wav'));
 5 | for file = files'
 6 |     [tossPath, name, tossExt] = fileparts(file.name);
 7 |     
 8 |     [y fs] = audioread(fullfile(path_to_files, file.name));
 9 |     opt.fs = fs;
10 |     opt.verbose = 0;
11 |     sn = fu_sylncl(y,opt);
12 |     
13 |     sn = sn ./ fs; % Get the timestamps in seconds
14 |     
15 |     output_fn = fullfile(output_path,strcat(name,'.txt'));
16 |     fd = fopen(output_fn,'w');
17 |     fprintf(fd,'%f\n',sn);
18 |     fclose(fd);
19 | end
20 | 
21 | end
22 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/column2rowvec.m:
--------------------------------------------------------------------------------
 1 | function v=column2rowvec(v)
 2 | 
 3 | % transposition in case input is column vector
 4 | 
 5 | if length(v(1,:))==1
 6 |     v=v';
 7 | end
 8 | 
 9 | return
10 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_filter.m:
--------------------------------------------------------------------------------
 1 | function sflt=fu_filter(s,t,gf,fs,o);
 2 | 
 3 | %sflt=fu_filter(s,t,gf,fs);
 4 | %s: signal vector
 5 | %t: type 'high'|'low'|'stop'|'band'
 6 | %gf: grenzfrequenzen (1 Wert --> Hoch-, Tiefpass, 2 Werte --> Bandpass)
 7 | %fs: sample frequency
 8 | %o: order, default 10
 9 | %applies butter filter
10 | %operates only if gf < fs/2
11 | 
12 | fn=gf/(fs/2);
13 | 
14 | if fn>=1
15 |     sflt=s;
16 |     return
17 | end
18 | 
19 | if nargin < 5; o=5; end
20 | 
21 | if strcmp(t,'band')
22 |   [b a]=butter(o,fn);
23 | else
24 |   [b a]=butter(o, fn, t);
25 | end
26 | 
27 | sflt=filtfilt(b,a,s);
28 | 
29 | if length(find(isnan(sflt)))>0
30 |     disp('filtering not possible, returning original signal');
31 |     sflt=s;
32 | end
33 | 
34 | %freqz(b,a,128,fs);
35 | %subplot(2,1,1)
36 | %x=32000:32000+fs;
37 | %plot(x,s(x),'-b')
38 | %subplot(2,1,2)
39 | %plot(x,sflt(x),'-b')
40 | %a=300000;
41 | %fhpt_play(sflt*a);
42 | 
43 | return
44 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_i_window.m:
--------------------------------------------------------------------------------
 1 | function wi = fu_i_window(i,wl,l)
 2 | 
 3 | % wi = fu_i_window(i,wl,l)
 4 | % i: index in vector
 5 | % wl: window length
 6 | % l: vector length
 7 | % wi: indices in window around i
 8 | % - returns indices of window around index i in vector of length l
 9 | % - if distance from i to end or beginning of vector is less than wl/2,
10 | %   the window is shifted accordingly
11 | 
12 | hwl=floor(wl/2);
13 | wi=max(i-hwl,1):min(i+hwl,l);
14 | 
15 | % if window too short: trying to lengthen window to wanted size
16 | d=wl-length(wi);
17 | if d>0
18 |     if wi(1)>1
19 |         o=max(wi(1)-d,1);
20 |         wi=o:wi(end);
21 |         d=wl-length(wi);
22 |     end
23 |     if d>0
24 |         if wi(end)<l
25 |             o=min(wi(end)+d,l);
26 |             wi=wi(1):o;
27 |         end
28 |     end
29 | end
30 | 
31 | return
32 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_locmax.m:
--------------------------------------------------------------------------------
 1 | function [pks idx] = fu_locmax(y,opt);
 2 | 
 3 | %[pks idx] = fu_locmax(y,opt);
 4 | %wrapper around 'findpeaks'
 5 | %y: data vector
 6 | %opt:
 7 | %   .smooth.win <1> smoothing options, see fu_smooth
 8 | %          .mtd <'none'>
 9 | %          .order <1>
10 | %   .peak.mph: <-Inf>  min peak height
11 | %          .th: <0>  threshold; min difference of local peak to neighbors
12 | %          .mpd: <1> min peak distance
13 | %   .verbose.plot: <0>|1
14 | %           .bw: <0>|1
15 | %pks: peak values
16 | %idx: their positions [sample]
17 | 
18 | %% init
19 | if nargin<2; opt=struct; end
20 | 
21 | opt=fu_optstruct_init(opt,{'smooth' 'peak'},{struct struct});
22 | opt.smooth=fu_optstruct_init(opt.smooth,{'win' 'mtd' 'order'},{1 'none' 1});
23 | opt.peak=fu_optstruct_init(opt.peak,{'mph' 'th' 'mpd'},{-Inf 0 1});
24 | 
25 | %% locmax
26 | opt.peak.mpd=min(opt.peak.mpd,length(y)-1);
27 | 
28 | [pks idx] = findpeaks(fu_smooth(y,opt.smooth),...
29 |             'MINPEAKDISTANCE',opt.peak.mpd,'MINPEAKHEIGHT',opt.peak.mph,...
30 |             'THRESHOLD',opt.peak.th);
31 | 
32 | % fallback
33 | if length(pks)==0
34 |     [pks idx] = findpeaks(y);
35 | end
36 | 
37 | % transpose to column vector since in 7.10.0 findpeaks() always returns
38 | % row vector!
39 | if size(y,2)==1
40 |     pks=fu_r2c(pks);
41 |     idx=fu_r2c(idx);
42 | end
43 | 
44 | return
45 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_optstruct_init.m:
--------------------------------------------------------------------------------
 1 | function opt = fu_optstruct_init(opt,optfields,optdefaults)
 2 | 
 3 | %opt = fu_optstruct_init(opt,optfields,optdefaults)
 4 | %initialisation of option structure OPT
 5 | %assigns each field given in cell array OPTFIELDS with corresponding
 6 | %default value given in cell array OPTDEFAULTS, whenever field is not
 7 | %yet specified
 8 | %if OPTDEFAULTS{i} is 'oblig' then optfields{i} had already to be set
 9 | %by the user. If not, an error is given.
10 | 
11 | for n=1:length(optfields)
12 |     if ~isfield(opt,optfields{n})
13 |         if (~isnumeric(optdefaults{n}) & strcmp(optdefaults{n},'oblig'))
14 |             error(sprintf('opt field "%s" has to be defined by the user!',optfields{n}));
15 |         end
16 |         opt=setfield(opt,optfields{n},optdefaults{n});
17 |     end
18 | end
19 | 
20 | return
21 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_pause_detector.m:
--------------------------------------------------------------------------------
  1 | function t = fu_pause_detector(s,opt);
  2 | 
  3 | % t = fu_pause_detector(s,opt);
  4 | % looks for pauses in signal according to criterias
  5 | % specified in opt
  6 | % input: s - signal vector
  7 | %        opt - structure with fields
  8 | %           .length: minimum length of pause in s
  9 | %           .rlength: length of reference window in s
 10 | %           .f_thresh: threshold factor (*rmse(reference_window))
 11 | %           .fs: sample rate
 12 | %           .ret: <'s'>|'smpl' return values in seconds or samples
 13 | %       default (optimised on IMS radio news corpus, read speech,
 14 | %             by fminunc()):    
 15 | %           opt.length = 0.1524;
 16 | %           opt.f_thresh = 0.0767;
 17 | %           opt.rlength = 5;
 18 | %           opt.fs = 16000;
 19 | % output: t - matrix of pause time on- and offsets (in s)
 20 | % algorithm:
 21 | % - preprocessing: removing DC, low pass filtering (10kHz)
 22 | % - window y with opt.length sec is moved over signal with stepsize
 23 | %   0.05 s
 24 | % - reference window rw with opt.rlength sec centered on y midpoint
 25 | %   is moved in parallel
 26 | % - if rmse(rw) < rmse(global_signal)*opt.f_thresh
 27 | %       rw is set to global_signal (long pause assumed)
 28 | % - if rmse(y) < rmse(rw)*opt.f_thresh
 29 | %       y is considered as a pause
 30 | % Uwe Reichel, IPS (2009)
 31 | 
 32 | 
 33 | %%%% defaults %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 34 | if nargin==1; opt=struct; end
 35 | ofld={'f_thresh' 'length' 'rlength' 'fs' 'ret'};
 36 | odef={0.0767 0.1524 5 16000 's'};
 37 | opt=fu_optstruct_init(opt,ofld,odef);
 38 | 
 39 | 
 40 | %%%% preprocessing %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 41 | % stereo->mono, mean 0
 42 | s = s(:,1)-mean(s(:,1));
 43 | % low pass filtering (just carried out if fs > 20kHz)
 44 | s = fu_filter(s,'low',10000,opt.fs);
 45 | 
 46 | 
 47 | %%%% settings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 48 | % reference window span
 49 | rws = floor(opt.rlength*opt.fs);
 50 | % signal length
 51 | ls=length(s);
 52 | % min pause length in samples
 53 | ml=floor(opt.length*opt.fs);
 54 | % global rmse and pause threshold
 55 | e_glob = fu_rmse(s);
 56 | t_glob = opt.f_thresh*e_glob;
 57 | % stepsize
 58 | %sts=floor(ml/4);
 59 | sts=max(1,floor(0.05*opt.fs));
 60 | stsh=floor(sts/2); % for centering of reference window
 61 | 
 62 | 
 63 | %%%% pause detection %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 64 | % output array collecting pause sample indices
 65 | t=[];
 66 | j=1;
 67 | 
 68 | 
 69 | for i=1:sts:ls
 70 |     %%%% window %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 71 |     yi=i:min(ls,i+ml-1);
 72 |     %tt=[yi(1) yi(end)]
 73 |     y=s(yi);
 74 |     e_y = fu_rmse(y);
 75 |     %%%% reference window %%%%%%%%%%%%%%%%%%%
 76 |     rw=s(fu_i_window(min(i+stsh,ls),rws,ls));
 77 |     e_rw=fu_rmse(rw);
 78 |     if (e_rw <= t_glob); e_rw=e_glob; end
 79 |     %%%% if rmse in window below threshold %%
 80 |     if e_y <= e_rw*opt.f_thresh
 81 |         if size(t,1)==j
 82 |             % values belong to already detected pause
 83 |             if yi(1) < t(j,2)
 84 |                 t(j,2)=yi(end);
 85 |             else                          % new pause
 86 |                 j=j+1;
 87 |                 t(j,:)=[yi(1) yi(end)];
 88 |             end
 89 |         else                              % new pause
 90 |             t(j,:)=[yi(1) yi(end)];
 91 |         end
 92 |     end
 93 | end
 94 | 
 95 | 
 96 | %%%%%% conversion of sample indices into %%%%%%%%%%%%%%
 97 | %%%%%% time on- and offset values (sec) %%%%%%%%%%%%%%%
 98 | 
 99 | if strcmp(opt.ret,'s'); t=t./opt.fs; end
100 | 
101 | return
102 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_r2c.m:
--------------------------------------------------------------------------------
 1 | function [v t]=fu_r2c(v)
 2 | 
 3 | %v=fu_r2c(v)
 4 | %[v t]=fu_r2c(v)
 5 | %if V is a ROW VECTOR, it is transposed and T is set to 1
 6 | %needed for uniform vector/matrix treatment in functions
 7 | %operating on column vectors
 8 | %see also fu_c2r, fu_transpose
 9 | 
10 | tb=0;
11 | % transpose row vector
12 | if size(v,1)==1
13 |    v=v';
14 |    tb=1;
15 | end
16 | 
17 | if nargout==2
18 |   t=tb;
19 | end
20 | 
21 | return
22 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_rmse.m:
--------------------------------------------------------------------------------
 1 | function e = fu_rmse(x,y)
 2 | 
 3 | %e = fu_rmse(x)
 4 | %e = fu_rmse(x,y)
 5 | %returns root mean squared error E between vector X and 0-line
 6 | %or root mean squared error E between vectors X and Y
 7 | 
 8 | if nargin < 2
 9 |   e=sqrt(sum(x.^2)/length(x));
10 | else
11 |   e=sqrt(sum((x-y).^2)/length(x));
12 | end
13 | 
14 | return
15 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_smooth.m:
--------------------------------------------------------------------------------
 1 | function ys=fu_smooth(y,opt)
 2 | 
 3 | %ys=fu_smooth(y,opt)
 4 | %bracket for smoothing
 5 | %faster but less flexible than fu_smoothing
 6 | %y: vector
 7 | % opt.mtd    % as in fun smooth (+ 'none')
 8 | %    .wl     % window length
 9 | %    .order  % polynomial order for sgolay
10 | 
11 | 
12 | if nargin<1; opt=struct; end
13 | opt=fu_optstruct_init(opt,{'mtd' 'wl' 'order'},{'mova' 5 3});
14 | 
15 | if strcmp(opt.mtd,'none')
16 |   ys=y;
17 | elseif ~strcmp(opt.mtd,'sgolay')
18 |    ys=smooth(y,opt.wl,opt.mtd);
19 | else
20 |    ys=smooth(y,opt.wl,opt.mtd,opt.order);
21 | end
22 | 
23 | return
24 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_smooth_binvec.m:
--------------------------------------------------------------------------------
 1 | function vs = fu_smooth_binvec(v,l);
 2 | 
 3 | % vs = fu_smooth_binvec(v,l);
 4 | % v: binary vector
 5 | % l: minimum length of 1- or 0-subsequences
 6 | % vs: smoothed vector (short subsequences get same value as neighbors)
 7 | % e.g. v = [1 1 1 1 0 0 1 1 1 1];
 8 | %      l = 3
 9 | % --> vs = [1 1 1 1 1 1 1 1 1 1];
10 | 
11 | [vs tt] = fu_transp(v,'r');
12 | 
13 | vs = fu_smooth_binvec_sub(vs,1,l);
14 | vs = fu_smooth_binvec_sub(vs,0,l);
15 | 
16 | vs=fu_transp(vs,tt);
17 | 
18 | return
19 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_smooth_binvec_sub.m:
--------------------------------------------------------------------------------
 1 | function v=fu_smooth_binvec_sub(v,b,l);
 2 | 
 3 | %called by fu_smooth_binvec
 4 | 
 5 | r = abs(b-1);
 6 | i = find(v==b);
 7 | if length(i)==0; return; end
 8 | di = [1 diff(i)];
 9 | seq_i=[];
10 | for j=1:length(di)
11 |     if di(j)>1
12 |         if length(seq_i) < l
13 |             v(seq_i)=r;
14 |         end
15 |         seq_i=[];
16 |     end
17 |     seq_i=[seq_i i(j)];
18 | end
19 | 
20 | % last seq
21 | if length(seq_i) < l; v(seq_i)=r; end
22 | 
23 | return
24 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_sylbnd.m:
--------------------------------------------------------------------------------
 1 | function sb = fu_sylbnd(s,sn,opt)
 2 | 
 3 | %sb = fu_sylbnd(s,sn,opt)
 4 | %called in fu_sylncl
 5 | %s: signal vector
 6 | %sn: vector with detected nucleus samples (by fu_sylncl_sub)
 7 | %opt: as provided for fu_sylncl
 8 | 
 9 | % window length for energy calculation in samples
10 | ml=floor(opt.length*opt.fs);
11 | % stepsize
12 | sts=max(1,floor(0.03*opt.fs));
13 | 
14 | sb=[];
15 | for i=1:length(sn)-1;  % for all adjacent syl ncl
16 |     on=sn(i);
17 |     off=sn(i+1);
18 |     sw = s(on:off);
19 |     ls = length(sw);
20 |     all_i=[];
21 |     all_e=[];
22 |     for j=1:sts:length(sw)  % for all windows within ncl pair
23 |         yi=fu_i_window(j,ml,ls);
24 |         y = sw(yi);
25 |         e_y = fu_rmse(y);
26 |         all_i=[all_i j];
27 |         all_e=[all_e e_y];
28 |     end
29 |     [ymin ymini] = min(all_e);
30 |     sb = [sb; on+all_i(ymini(1))];
31 | end
32 | 
33 | return
34 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_sylncl.m:
--------------------------------------------------------------------------------
  1 | function [sn sb] = fu_sylncl(s,opt);
  2 | 
  3 | % sn = fu_sylncl(s,opt);
  4 | % [sn sb] = fu_sylncl(s,opt);
  5 | % opt.do='apply':
  6 | %  default case.
  7 | %  returns vector sn of syllable nucleus samples in speech signal s
  8 | %  given opt structure with fields as specified in training output below.
  9 | %  Optionally, sb, a vector of syllable boundary samples is returned
 10 | %  (simply the sample minimum energy between two adjacent nuclei)
 11 | % opt.do='train':
 12 | %   .ref: sample reference
 13 | %   .fs: sample frequency
 14 | %   .errtype: <'f'>|'io'|'mae'
 15 | %       error type: 1-fscore (best choice)
 16 | %                   n_ins+n_omis (used in diss)
 17 | %                   1-MAE (after alignment)
 18 | %  returns structure SN to be used as OPT in 'apply' case
 19 | %   .f_thresh: energy threshold factor
 20 | %   .bf: lower and upper boundary frequencies for band pass filtering
 21 | %   .do: 'apply'
 22 | %   .fs: sample frequency of input signal
 23 | %   .e_min: minimum needed proportion of max energy
 24 | %   .length: length of energy window in s
 25 | %   .rlength: length of reference energy window (>length) in s
 26 | %   .md: min distance between subsequent nuclei in s (set to 0 if to be
 27 | %       neglected)
 28 | %   .nouse_int: <[]>; n x 2 matrix [on off] of intervals not to be used
 29 | %        (e.g. pause intervals). In samples! E.g. output of
 30 | %        fu_pause_detector (with opt.ret='smpl'). Additionally,
 31 | %        0-output of fu_voicing (to be transformed for compatibility) can
 32 | %        be used. Both can also be called inline setting .do_nouse>0
 33 | %   .do_nouse: <0>|1|2|3: create or enlarge .nouse_int matrix by
 34 | %       finding pauses and/or voiceless utterance parts
 35 | %            <0> - do nothing
 36 | %            1 - detect pauses and voiceless utterance parts
 37 | %            2 - pause only
 38 | %            3 - voiceless utterance parts only
 39 | %   .verbose: plot signal and nuclei
 40 | %
 41 | % -- exclude pause and voiceless intervals from analysis?
 42 | % opt.pau.do=<'apply'>|'skip': prceeding
 43 | %        .* see matlab_lib/fu_pause_detector.m
 44 | % opt.voi.do=<'apply'>|'skip': preceeding voicing detection
 45 | %        .*: see fu_voicing.m
 46 | %
 47 | % minimal application example:
 48 | % [y fs] = wavread('myaudio.wav');
 49 | % opt.fs = fs;
 50 | % opt.verbose = 1;
 51 | % [sn sb] = fu_sylncl(y,opt);
 52 | 
 53 | global s_glob;
 54 | global opt_glob;
 55 | close all
 56 | 
 57 | if nargin==1; opt=struct; end
 58 | opt=fu_optstruct_init(opt,{'do' 'nouse_int' 'do_nouse' 'errtype'},{'apply' [] 2 'f'});
 59 | ofld={'do' 'bf' 'f_thresh' 'length' 'rlength' 'md' 'e_min' 'fs' ...
 60 |     'verbose' 'pau' 'unv'};
 61 | 
 62 | 
 63 | % preprocessing -> defining intervals not usable for syllable nuclei
 64 | % matrix, rows: on- and offset in samples
 65 | %opt.nouse_int = fu_sylncl_no_use_intervals(s,opt);
 66 | opt.nouse_int = [];
 67 | 
 68 | if strcmp(opt.do,'apply')       %%%%%% apply %%%%%%%%%
 69 |     %fscore optimised on si1000p reference data
 70 |     odef={'apply' [212.5509 3967.1] 1.0681 0.0776 0.1491 0.1 0.1571 16000 0 struct struct};
 71 |     opt=fu_optstruct_init(opt,ofld,odef);
 72 |     opt.pau = fu_optstruct_init(opt.pau, {'fs' 'ret'}, {opt.fs 'smpl'});
 73 |     opt.unv = fu_optstruct_init(opt.unv, {'sts'}, {1});
 74 |     sn=fu_sylncl_sub(s,opt);
 75 |     % add syl boundaries
 76 |     if nargout>1
 77 |         sb=fu_sylbnd(s,sn,opt);
 78 |     end
 79 | else                            %%%%%% train %%%%%%%%%
 80 |     s_glob=s;
 81 |     opt_glob=opt;
 82 |     %o_opt=optimset(@fminunc);
 83 |     o_opt=optimset(@fminsearch);
 84 |     o_opt=optimset('LargeScale','on');
 85 |     % [f_lowbnd/100 f_upbndf/1000 threshold_factor ncl_length ref_length
 86 |     %  minimum_rms]
 87 |     w0=[2.3 2.9 1.06 0.08 0.14 0.16];
 88 |     
 89 |     %[w fval ef o]=fminunc(@fu_sylncl_err,w0,o_opt);
 90 |     [w fval ef o]=fminsearch(@fu_sylncl_err,w0,o_opt);
 91 |     opt=fu_optstruct_init(opt,ofld,{'apply' [w(1)*100 w(2)*1000] ...
 92 |         w(3) w(4) w(5) w(6) opt.fs 1});
 93 |     sn=fu_sylncl_sub(s,opt);
 94 | end
 95 | 
 96 | if opt.verbose==1
 97 |     %[sn [sb; NaN]]
 98 |     %t=[1:length(s)]./opt.fs;
 99 |     t=[1:length(s)];
100 |     plot(t,s);
101 |     hold on
102 |     %if isfield(opt,'ref')
103 |     %    for i=opt.ref; plot([i i],[-1 1],'-g'); end
104 |     %end
105 |     for i=sn; plot([i i],[-1 1],'-r'); end
106 |     if nargout>1
107 |         for i=sb; plot([i i],[-1 1],'-g'); end
108 |     end
109 | end
110 |  
111 | if strcmp(opt.do,'train')
112 |     opt.do='apply';
113 |     opt.error=fval;
114 |     sn=opt;
115 |     sn_opt=opt;
116 |     save('sn_opt','sn_opt');
117 | end
118 | 
119 | return
120 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_sylncl_sub.m:
--------------------------------------------------------------------------------
  1 | function t=fu_sylncl_sub(s,opt);
  2 | 
  3 | % returns samples of syllable nuclei given signal S and processing
  4 | % options OPT (see fu_sylncl for details)
  5 | % called by fu_sylncl
  6 | 
  7 | % recall higher before 2. nucl splitting. why???
  8 | 
  9 | %% settings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 10 | % reference window span
 11 | rws = floor(opt.rlength*opt.fs);
 12 | % signal length
 13 | ls=length(s);
 14 | % window length for energy calculation in samples
 15 | ml=floor(opt.length*opt.fs);
 16 | % minimum distance between subsequent nuclei in samples
 17 | md=floor(opt.md*opt.fs);
 18 | % stepsize
 19 | sts=max(1,floor(0.03*opt.fs));
 20 | stsh=floor(sts/2); % for centering of reference window
 21 | 
 22 | %% no use intervals (pause, voiceless) %%%%%%%%%%%%%%%%%%%%
 23 | % -> vector of all samples not to be used
 24 | t_nou_init = [];
 25 | t_nou_pau=[];
 26 | voi=[];
 27 | t_nou=[];
 28 | if isfield(opt,'nouse_int')
 29 |     t_nou_init = opt.nouse_int;
 30 | end
 31 | if opt.do_nouse>0
 32 |    if opt.do_nouse < 3
 33 |        t_nou_pau = fu_pause_detector(s,opt.pau);
 34 |    end
 35 |    if (opt.do_nouse==1 | opt.do_nouse==3)
 36 |        [voi zrr] = fu_voicing(s,opt.fs,opt.unv);
 37 |    end 
 38 | end
 39 | for i=1:size(t_nou_init,1)
 40 |     t_nou=[t_nou t_nou_init(i,1):t_nou_init(i,2)];
 41 | end
 42 | for i=1:size(t_nou_pau,1)
 43 |     t_nou=[t_nou t_nou_pau(i,1):t_nou_pau(i,2)];
 44 | end
 45 | t_nou=unique([t_nou find(voi==0)']);
 46 | 
 47 | 
 48 | %%%% filtering %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 49 | if length(opt.bf)==1; ft='low';
 50 | else; ft='band'; end
 51 | 
 52 | ord=5; % filter order, the higher the steeper, but incapable to filter
 53 |        % narrow bands
 54 | s=fu_filter(s,ft,opt.bf,opt.fs,ord);
 55 | 
 56 | %%%% settings 2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 57 | % minimum energy as portion of maximum energy found
 58 | e_y=[];
 59 | for i=1:sts:ls
 60 |     %%%% window %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 61 |     yi=i:min(ls,i+ml-1);
 62 |     y=s(yi);
 63 |     e_y = [e_y fu_rmse(y)];
 64 | end
 65 | 
 66 | e_min=opt.e_min*max(e_y);
 67 | mey=max(e_y);
 68 |                                                                                 
 69 | 
 70 | % output vector collecting nucleus sample indices
 71 | t=[];
 72 | 
 73 | all_i=[];
 74 | all_e=[];
 75 | all_r=[];
 76 | 
 77 | 
 78 | for i=1:sts:ls
 79 |     yi=fu_i_window(i,ml,ls);
 80 |     y=s(yi);
 81 |     e_y = fu_rmse(y);
 82 |     rwi = fu_i_window(i,rws,ls);
 83 |     rw = s(rwi);
 84 |     e_rw=fu_rmse(rw);
 85 |     all_i=[all_i i];
 86 |     all_e=[all_e e_y];
 87 |     all_r=[all_r e_rw];
 88 | end
 89 | 
 90 | lmopt=struct;
 91 | 
 92 | lmopt.peak.mpd = floor(opt.fs*opt.md/sts);
 93 | [pks idx] = fu_locmax(all_e,lmopt);
 94 | t=[];
 95 | for i=idx
 96 |    if (all_e(i) >= all_r(i)*opt.f_thresh & all_e(i) > e_min)
 97 |        if length(find(t_nou==all_i(i)))==0
 98 |            t=[t; all_i(i)];
 99 |        end
100 |    end
101 | end
102 | 
103 | 
104 | 
105 | return
106 | 
107 | 
108 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_transp.m:
--------------------------------------------------------------------------------
 1 | function [xt done] = fu_transp(x,do)
 2 | 
 3 | % xt = fu_transp(x,do)
 4 | % [xt done] = fu_transp(x,do)
 5 | % x: vector
 6 | % do: <'t'>|'r'|'c'|'i' - transpose, make row, make column, ignore
 7 | %       'r' and 'c' just make sense for vectors!!
 8 | %       input 1|0 is mapped to 't'|'i' for backward compatibility
 9 | % xt: x +/- transposed
10 | % done: 't' if transformation was carried out, else 'i' (for consistent
11 | %       reapplication of fu_transpose_vec)
12 | % Of use e.g. if a function would need a column vector as input without
13 | % bothering the user and returning a vector in the same format as the input
14 | % See example in fu_smooth_binvec.m
15 | 
16 | if nargin<2; do='t'; end
17 | if isnumeric(do)
18 |     if do==1; do='t';
19 |     else do='i';
20 |     end
21 | end
22 | 
23 | dun='i';
24 | xt=x;
25 | 
26 | if strcmp(do,'t')
27 |     xt=x';
28 |     dun=do;
29 | elseif ~strcmp(do,'i')
30 |     s=size(x);
31 |     if min(s) > 1
32 |         disp('Transposition just applicable for vectors. Done nothing.');
33 |     else
34 |         if ((strcmp(do,'r') && s(2)==1) || (strcmp(do,'c') && s(1)==1))
35 |             xt=x';
36 |             dun='t';
37 |         end
38 |     end
39 | end
40 | 
41 | if nargout > 1; done=dun; end
42 | 
43 | return
44 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_trim_vec.m:
--------------------------------------------------------------------------------
 1 | function vt = fu_trim_vec(v,w,a);
 2 | 
 3 | % vt=fu_trim_vec(v,w,e);
 4 | % pops vector V or pushes scalar/vector A to V until size of V is equal to
 5 | % size of W
 6 | 
 7 | vt=column2rowvec(v);
 8 | while length(vt)<length(w)
 9 |     vt=[vt a];
10 | end
11 | 
12 | vt=vt(1:length(w));
13 | 
14 | if size(vt,1) ~= size(w,1)
15 |     vt=vt';
16 | end
17 | 
18 | return
19 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_typecount.m:
--------------------------------------------------------------------------------
 1 | function tc = fu_typecount(v)
 2 | 
 3 | %tc = fu_numcount(v)
 4 | %returns counts of each type in vector V in matrix TC
 5 | %TC: each row contains 'type count'-pair, types are sorted
 6 | %usable for vectorisation of algorithms
 7 | 
 8 | [vs i j] = unique(sort(row2columnvec(v)));
 9 | 
10 | d=diff([0;i]);
11 | 
12 | tc=[vs d];
13 | 
14 | return
15 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_voicing.m:
--------------------------------------------------------------------------------
 1 | function [voi zrr] = fu_voicing(y,sr,opt);
 2 | 
 3 | % voi = fu_voicing(y,sr <,opt>);
 4 | % [voi zr] = fu_voicing(y,sr <,opt>);
 5 | % Y: signal
 6 | % SR: sample rate
 7 | % VOI: vector with 1 element per window
 8 | %   1: voiced
 9 | %   0: voiceless/pause
10 | % ZR: do=='apply': vector of zero crossing rates, one value per window
11 | %         'train': opt struct with optimised .th and .zr_th and .err error
12 | % OPT:
13 | %   .do: <apply>|train
14 | %   .wl: window length <0.03> (<1: in s, >=1: in samples)
15 | %   .th: <0.002> relative amplitude threshold, y<max*.th is ignored
16 | %   .sts: step size <0.01> (<1: in s, >=1: in samples)
17 | %   .zr_th: <2000> (below & >0: voiced; use higher value for increased
18 | %           recall, lower value for increased precision)
19 | %   .min_nf: <3> (min number of frames in a row to be constantly
20 | %                 (un)voiced. Interpolation over shorter sequences
21 | %   .ret: <'w'>|'smpl'
22 | %            'w': one value per window
23 | %            'smpl': one value per signal sample
24 | %  IF .do equal 'train'
25 | %   .errfun <@fu_voicing_err>
26 | %   .ref: reference matrix or vector (see e.g. voi_ref.dat)
27 | %   --> optimisation of .th and .zr_th
28 | %   integrated training call by FU_VOI_OPTIM_BRACKET
29 | %
30 | % voicing detection by zero crossing rate
31 | % BEWARE: Default parameters are optimised on si1000p reference and
32 | % sts=0.01. If step size is changed, than $sts in sncl_ref.pl has to
33 | % be changed the same way!!!
34 | % param values are informally optimised on SI1000P reference data:
35 | %   hamming: 0.1180
36 | % precision: 0.8898
37 | %    recall: 0.9045
38 | 
39 | if nargin < 3; opt=struct; end
40 | opt = fu_optstruct_init(opt,{'wl' 'th' 'sts' 'zr_th' 'do' 'min_nf' 'ret'},...
41 |                             {0.03 0.002 0.01 2000 'apply' 3 'w'});
42 | opt.sr = sr;
43 | 
44 | 
45 | if strcmp(opt.do,'apply')  %%%% application
46 |     [voi zr] = fu_voicing_sub(y,opt);
47 |     if nargout==2; zrr=zr; end
48 | else   %%%%%%%%%%%%%%%%%%%%%%%% training
49 |     %o_opt=optimset(@fminunc);
50 |     o_opt=optimset(@fminsearch);
51 |     o_opt=optimset('LargeScale','on');
52 |     w0=[0.004 1000];
53 |     %[w fval ef o]=fminunc(opt.errfun,w0,o_opt);
54 |     [w fval ef o]=fminsearch(opt.errfun,w0,o_opt);
55 |     opt.th=w(1);
56 |     opt.zr_th=w(2);
57 |     [voiv zr] = fu_voicing_sub(y,opt);
58 |     % error
59 |     voiv=fu_trim_vec(voiv,opt.ref,0);
60 |     e = pdist([voiv;opt.ref],'hamming');
61 |     voi=opt;
62 |     voi.err=e;
63 |     if nargout==2; zrr=e; end % to avoid crash
64 | end
65 | 
66 | return
67 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_voicing_sub.m:
--------------------------------------------------------------------------------
 1 | function [voi zrr] = fu_voicing_sub(y,opt);
 2 | 
 3 | % returns binary vector (1=voiced frame) for signal vector Y
 4 | % and specs given in OPT
 5 | % called by fu_voicing
 6 | 
 7 | zr = fu_zero_crossing_rate(y,opt.sr,opt);
 8 | voi=zeros(length(zr),1);
 9 | voi(find(zr<opt.zr_th & zr>0))=1;
10 | 
11 | if opt.min_nf>1
12 |     voi=fu_smooth_binvec(voi,opt.min_nf);
13 | end
14 | 
15 | 
16 | if nargout==2; zrr=zr; end
17 | 
18 | return
19 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_window_bnd.m:
--------------------------------------------------------------------------------
 1 | function wb = fu_window_bnd(wl,ly,opt);
 2 | 
 3 | %wb = fu_window_bnd(wl,ly,opt);
 4 | %returns matrix of window on- and offset indices (one pair per row)
 5 | %windows are centered on each index 1:opt.sts:ly
 6 | % wl: window length
 7 | % ly: length of vector
 8 | % opt:
 9 | %   .sts: int <1> - step size
10 | %   .idx: <0>|1   - if one not just bounds but also all indices between
11 | % e.g. wl=2; ly=6; opt.sts=1; opt.idx=0;
12 | % --> wb = [1 2; 1 3; 2 4; 3 5; 4 6; 5 6]
13 | %                             opt.idx=0;
14 | % --> wb = [1 1 2; 1 2 3; 2 3 4; 3 4 5; 4 5 6; 5 6 6]
15 | %usable for vectorisation of algorithms
16 | 
17 | if nargin<3; opt=struct; end
18 | opt = fu_optstruct_init(opt,{'sts' 'idx'},{1 0});
19 | 
20 | x=[1:opt.sts:ly]';
21 | h=round(wl/2);
22 | 
23 | if opt.idx==0
24 |     hh = [-h h];
25 | else
26 |     hh= -h:h;
27 | end
28 | 
29 | wb=repmat(hh,size(x,1),1)+repmat(x,1,size(hh,2));
30 | wb(find(wb<1))=1;
31 | wb(find(wb>ly))=ly;
32 | 
33 | return
34 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_window_vec.m:
--------------------------------------------------------------------------------
 1 | function m = fu_window_vec(v,opt);
 2 | 
 3 | % m = fu_window_vec(v,opt);
 4 | % windows vector V according to specs in struct opt
 5 | % V: input vector
 6 | % M: matrix, one window per row
 7 | % OPT:
 8 | %   .sts: <1> int, step size
 9 | %   .wl: <1> int, window length
10 | 
11 | % opt init
12 | % idx is needed for fu_window_bnd, not to be specified by user
13 | %usable for vectorisation of algorithms
14 | 
15 | if nargin<2; opt=struct; end
16 | opt = fu_optstruct_init(opt,{'sts' 'wl'},{1 1});
17 | opt.idx=1;
18 | wb = fu_window_bnd(opt.wl,length(v),opt);
19 | 
20 | m=v(wb);
21 | 
22 | return
23 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/fu_zero_crossing_rate.m:
--------------------------------------------------------------------------------
 1 | function zr = fu_zero_crossing_rate(y,sr,opt);
 2 | 
 3 | %zr = fu_zero_crossing_rate(y,sr [,opt]);
 4 | %y: signal vector
 5 | %sr: sample rate (<16000>)
 6 | %opt
 7 | %   .wl: <0.01> window length (<1: in s, >=1: in samples)
 8 | %   .th: <0.004> min abs extreme point amplitude (vs. noise in <p:>)
 9 | %        used as a factor: .th * max(abs(y)) !
10 | %   .sts: step size <1> (<1: in s, >=1: in samples)
11 | %zr: zero crossing rate in crossing/sec (same length as Y)
12 | % set all data points below .th to NaN
13 | % 
14 | % center window of length .wl on each data point in Y
15 | % 
16 | 
17 | if nargin < 2; sr=16000; end
18 | if nargin < 3; opt=struct; end
19 | opt=fu_optstruct_init(opt,{'wl' 'th' 'sts'},{0.01 0.004 1});
20 | 
21 | % sec -> samples
22 | if opt.wl < 1; opt.wl = round(opt.wl*sr); end
23 | if opt.sts < 1; opt.sts = round(opt.sts*sr); end
24 | 
25 | % filter values below threshold
26 | ya=abs(y);
27 | y(find(ya<opt.th*max(ya)))=NaN;
28 | 
29 | % multiplying subsequent data points: <=0 -> zero crossing
30 | zcv = [NaN; row2columnvec(y(1:end-1).*y(2:end))];
31 | 
32 | % -> matrix, one row per window
33 | zcm = fu_window_vec(zcv,opt);
34 | 
35 | % zero crossings
36 | [ri ci] = find(zcm<=0);
37 | 
38 | % how many zero crossings per window?
39 | zcw = fu_typecount(ri);
40 | 
41 | % getting rate
42 | l=size(zcm,2);
43 | 
44 | zr=zeros(size(zcm,1),1);
45 | zr(zcw(:,1)) = zcw(:,2) / l * sr;
46 | 
47 | return
48 | 


--------------------------------------------------------------------------------
/matlabScripts/nucleus_detection_matlab/row2columnvec.m:
--------------------------------------------------------------------------------
 1 | function v=row2columnvec(v)
 2 | 
 3 | if length(v)==0; return; end
 4 | 
 5 | if length(v(:,1))==1
 6 |     v=v';
 7 | end
 8 | 
 9 | return
10 | 


--------------------------------------------------------------------------------
/praatScripts/get_pitch_and_intensity.praat:
--------------------------------------------------------------------------------
 1 | # Based on http://www.fon.hum.uva.nl/praat/manual/Script_for_listing_time_--F0_--intensity.html
 2 | #
 3 | 
 4 | 
 5 | # Pitch and intensity parameters
 6 | # male: 50, 350
 7 | # female: 75, 450
 8 | sampleStep = 0.01
 9 | minPitch = 75
10 | maxPitch = 450
11 | 
12 | 
13 | # Directory needs a final '/'
14 | # **Both directories need to already exist**
15 | input_directory$ = "/Users/tmahrt/Desktop/experiments/LMEDS_studies/RPT_English/features_test/wav/female/"
16 | output_directory$ = "/Users/tmahrt/Desktop/experiments/LMEDS_studies/RPT_English/features_test/pitch_and_intensity_listings/"
17 | 
18 | strings = Create Strings as file list... list 'input_directory$'*.wav
19 | numberOfFiles = Get number of strings
20 | for ifile to numberOfFiles
21 |     selectObject: strings
22 |     fileName$ = Get string: ifile
23 |     Read from file: input_directory$ + fileName$
24 |     name$ = fileName$ - ".wav"
25 | 	
26 | 	sound = selected ("Sound")
27 | 	selectObject: sound
28 | 	tmin = Get start time
29 | 	tmax = Get end time
30 | 	
31 | 	To Pitch: sampleStep, minPitch, maxPitch
32 | 	Rename: "pitch"
33 | 	
34 | 	selectObject: sound
35 | 	To Intensity: minPitch, sampleStep
36 | 	Rename: "intensity"
37 | 	
38 | 	for i to (tmax-tmin)/sampleStep
39 |     		time = tmin + i * sampleStep
40 |     		selectObject: "Pitch pitch"
41 |     		pitch = Get value at time: time, "Hertz", "Linear"
42 |     		selectObject: "Intensity intensity"
43 |     		intensity = Get value at time: time, "Cubic"
44 |     		appendFileLine: "'output_directory$''name$'.txt", fixed$ (time, 2), ",", fixed$ (pitch, 3), ",", fixed$ (intensity, 3)
45 | 	endfor
46 | 
47 | 
48 | 	# Cleanup
49 | 
50 | 	selectObject: "Pitch pitch"
51 | 	Remove
52 | 
53 | 	selectObject: "Intensity intensity"
54 | 	Remove
55 | 
56 | 	selectObject: sound
57 | 	Remove
58 | 
59 | endfor
60 | 
61 | selectObject: strings
62 | Remove
63 | 
64 | 


--------------------------------------------------------------------------------
/praatScripts/psolaPitch.praat:
--------------------------------------------------------------------------------
 1 | numSteps = %(num_steps)s
 2 | 
 3 | Read from file... %(input_dir)s/%(input_name)s.wav
 4 | 
 5 | for iStep to numSteps - 1
 6 |     zeroedI = iStep
 7 |     
 8 |     Read from file... %(pitch_dir)s/%(input_name)s_'zeroedI'.PitchTier
 9 |     select Sound %(input_name)s
10 |     To Manipulation... 0.01 %(pitch_lower_bound)d %(pitch_upper_bound)d
11 |     
12 |     select PitchTier %(input_name)s_'zeroedI'
13 |     plus Manipulation %(input_name)s
14 |     Replace pitch tier
15 |     
16 |     select Manipulation %(input_name)s
17 |     Get resynthesis (overlap-add)
18 |     Save as WAV file... %(output_dir)s/%(output_name)s_'zeroedI'.wav
19 |     Remove
20 |     
21 |     select Manipulation %(input_name)s
22 |     Remove
23 |     select PitchTier %(input_name)s_'zeroedI'
24 |     Remove
25 | endfor
26 | 
27 | select Sound %(input_name)s
28 | Remove


--------------------------------------------------------------------------------
/pyacoustics/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Oct 27, 2014
3 | 
4 | @author: tmahrt
5 | """
6 | 


--------------------------------------------------------------------------------
/pyacoustics/aggregate_features.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Oct 20, 2014
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import os
 8 | from os.path import join
 9 | import io
10 | 
11 | from pyacoustics.utilities import utils
12 | 
13 | 
14 | def aggregateFeatures(featurePath, featureList, headerStr=None):
15 | 
16 |     outputDir = join(featurePath, "aggr")
17 |     utils.makeDir(outputDir)
18 | 
19 |     fnList = []
20 |     dataList = []
21 | 
22 |     # Find the files that exist in all features
23 |     for feature in featureList:
24 |         fnSubList = utils.findFiles(join(featurePath, feature), filterExt=".txt")
25 |         fnList.append(fnSubList)
26 | 
27 |     actualFNList = []
28 |     for featureFN in fnList[0]:
29 |         if all([featureFN in subList for subList in fnList]):
30 |             actualFNList.append(featureFN)
31 | 
32 |     for featureFN in actualFNList:
33 |         dataList = []
34 |         for feature in featureList:
35 |             featureDataList = utils.openCSV(
36 |                 join(featurePath, feature), featureFN, encoding="utf-8"
37 |             )
38 |             dataList.append([",".join(row) for row in featureDataList])
39 | 
40 |         name = os.path.splitext(featureFN)[0]
41 | 
42 |         dataList.insert(0, [name for _ in range(len(dataList[0]))])
43 |         tDataList = utils.safeZip(dataList, enforceLength=True)
44 |         outputList = [",".join(row) for row in tDataList]
45 |         outputTxt = "\n".join(outputList)
46 | 
47 |         outputFN = join(outputDir, name + ".csv")
48 |         with io.open(outputFN, "w", encoding="utf-8") as fd:
49 |             fd.write(outputTxt)
50 | 
51 |     # Cat all files together
52 |     aggrOutput = []
53 | 
54 |     if headerStr is not None:
55 |         aggrOutput.append(headerStr)
56 | 
57 |     for fn in utils.findFiles(outputDir, filterExt=".csv"):
58 |         if fn == "all.csv":
59 |             continue
60 |         with io.open(join(outputDir, fn), "r", encoding="utf-8") as fd:
61 |             aggrOutput.append(fd.read())
62 | 
63 |     with io.open(join(outputDir, "all.csv"), "w", encoding="utf-8") as fd:
64 |         fd.write("\n".join(aggrOutput))
65 | 


--------------------------------------------------------------------------------
/pyacoustics/intensity_and_pitch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/intensity_and_pitch/__init__.py


--------------------------------------------------------------------------------
/pyacoustics/intensity_and_pitch/get_f0.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A Python implementation of ESPS's getF0 function
 3 | 
 4 | The implementation is part of tkSnack.  As I recall, it is a bit
 5 | cumbersome to install, although there are python distributions,
 6 | like ActiveState, which come with it preinstalled.  For more information,
 7 | visit the snack website:
 8 | http://www.speech.kth.se/snack/
 9 | """
10 | import os
11 | from os.path import join
12 | 
13 | import Tkinter
14 | 
15 | root = Tkinter.Tk()
16 | import tkSnack
17 | 
18 | tkSnack.initializeSnack(root)
19 | 
20 | 
21 | from rpt_feature_suite.utilities import utils
22 | 
23 | MALE = "male"
24 | FEMALE = "female"
25 | 
26 | SAMPLE_FREQ = 100
27 | 
28 | 
29 | def extractPitch(fnFullPath, minPitch, maxPitch):
30 |     """
31 | 
32 |     Former default pitch values: male (50, 350); female (75, 450)
33 |     """
34 | 
35 |     soundObj = tkSnack.Sound(load=fnFullPath)
36 | 
37 |     output = soundObj.pitch(method="ESPS", minpitch=minPitch, maxpitch=maxPitch)
38 | 
39 |     pitchList = []
40 |     for value in output:
41 |         value = value[0]
42 | 
43 |         if value == 0:
44 |             value = int(value)
45 |         pitchList.append(value)
46 | 
47 |     return pitchList, SAMPLE_FREQ
48 | 
49 | 
50 | def getPitchAtTime(pitchList, startTime, endTime):
51 |     startIndex = int(startTime * SAMPLE_FREQ)
52 |     endIndex = int(endTime * SAMPLE_FREQ)
53 | 
54 |     return pitchList[startIndex:endIndex]
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     path = "/Users/tmahrt/Desktop/fire_new_audio_test"
59 |     for name in utils.findFiles(path, filterExt=".wav", stripExt=True):
60 |         tmpPitchList = extractPitch(join(path, name + ".wav"), 75, 450)
61 |         tmpPitchList = [str(val) for val in tmpPitchList]
62 | 
63 |         with open(join(path, name + "_f0.csv"), "w") as fd:
64 |             fd.write("\n".join(tmpPitchList))
65 | 


--------------------------------------------------------------------------------
/pyacoustics/morph/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/morph/__init__.py


--------------------------------------------------------------------------------
/pyacoustics/morph/intensity_morph.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Apr 2, 2015
  3 | 
  4 | @author: tmahrt
  5 | """
  6 | 
  7 | import os
  8 | from os.path import join
  9 | 
 10 | import math
 11 | import copy
 12 | 
 13 | from pyacoustics.morph.morph_utils import common
 14 | from pyacoustics.morph.morph_utils import plot_morphed_data
 15 | from pyacoustics.utilities import utils
 16 | from pyacoustics.utilities import sequences
 17 | from pyacoustics.signals import audio_scripts
 18 | from pyacoustics.utilities import my_math
 19 | 
 20 | 
 21 | def intensityMorph(
 22 |     fromWavFN,
 23 |     toWavFN,
 24 |     fromWavTGFN,
 25 |     toWavTGFN,
 26 |     tierName,
 27 |     numSteps,
 28 |     coreChunkSize,
 29 |     plotFlag,
 30 | ):
 31 |     fromDataTupleList = common.getIntervals(fromWavTGFN, tierName)
 32 |     toDataTupleList = common.getIntervals(toWavTGFN, tierName)
 33 | 
 34 |     outputName = os.path.splitext(fromWavFN)[0] + "_int_" + tierName
 35 | 
 36 |     _intensityMorph(
 37 |         fromWavFN,
 38 |         toWavFN,
 39 |         fromDataTupleList,
 40 |         toDataTupleList,
 41 |         numSteps,
 42 |         coreChunkSize,
 43 |         plotFlag,
 44 |         outputName,
 45 |     )
 46 | 
 47 | 
 48 | def _intensityMorph(
 49 |     fromWavFN,
 50 |     toWavFN,
 51 |     fromDataTupleList,
 52 |     toDataTupleList,
 53 |     numSteps,
 54 |     coreChunkSize,
 55 |     plotFlag,
 56 |     outputName=None,
 57 | ):
 58 |     if outputName is None:
 59 |         outputName = os.path.splitext(fromWavFN)[0] + "_int"
 60 | 
 61 |     outputDir = join(os.path.split(fromWavFN)[0], "output")
 62 |     utils.makeDir(outputDir)
 63 | 
 64 |     # Determine the multiplication values to be used in normalization
 65 |     # - this extracts one value per chunk
 66 |     expectedLength = 0
 67 |     normFactorList = []
 68 |     truncatedToList = []
 69 |     chunkSizeList = []
 70 |     fromDataList = []
 71 | 
 72 |     fromParams = audio_scripts.getParams(fromWavFN)
 73 |     toParams = audio_scripts.getParams(toWavFN)
 74 | 
 75 |     for fromTuple, toTuple in zip(fromDataTupleList, toDataTupleList):
 76 |         fromStart, fromEnd = fromTuple[:2]
 77 |         toStart, toEnd = toTuple[:2]
 78 | 
 79 |         expectedLength += (fromEnd - fromStart) * fromParams[2]
 80 | 
 81 |         fromDataList.extend(fromSubWav.rawDataList)
 82 | 
 83 |         normFactorListTmp, a = getRelativeNormalizedFactors(
 84 |             fromSubWav, toSubWav, coreChunkSize
 85 |         )
 86 |         tmpChunkList = [tmpChunkSize for value, tmpChunkSize in normFactorListTmp]
 87 |         chunkSizeList.append(sum(tmpChunkList))
 88 |         normFactorList.extend(normFactorListTmp)
 89 |         truncatedToList.extend(a)
 90 | 
 91 |     interpolatedResults = []
 92 |     normFactorGen = [
 93 |         sequences.interp(1.0, factor[0], numSteps) for factor in normFactorList
 94 |     ]
 95 |     tmpChunkSizeList = [factor[1] for factor in normFactorList]
 96 |     for i in xrange(numSteps):
 97 |         outputFN = "%s_s%d_%d_%d.wav" % (outputName, coreChunkSize, numSteps - 1, i)
 98 | 
 99 |         tmpNormFactorList = [next(normFactorGen[j]) for j in xrange(len(normFactorGen))]
100 | 
101 |         # Skip the first value (same as the input value)
102 |         if i == 0:
103 |             continue
104 | 
105 |         tmpInputList = zip(tmpNormFactorList, tmpChunkSizeList)
106 | 
107 |         normalizationTuple = expandNormalizationFactors(tmpInputList)
108 |         expandedNormFactorList = normalizationTuple[0]
109 | 
110 |         # It happened once that the expanded factor list was off by one value
111 |         # -- I could not determine why, so this is just a cheap hack
112 |         if len(expandedNormFactorList) == (expectedLength - 1):
113 |             expandedNormFactorList.append(expandedNormFactorList[-1])
114 | 
115 |         #         print("Diff: ", expectedLength, len(expandedNormFactorList))
116 |         assert expectedLength == len(expandedNormFactorList)
117 | 
118 |         newWavObj = copy.deepcopy(fromWavObj)
119 |         newRawDataList = []
120 | 
121 |         # Apply the normalization and reinsert the data back
122 |         # into the original file
123 |         offset = 0
124 |         for fromTuple, chunkSize in zip(fromDataTupleList, chunkSizeList):
125 |             fromStart, fromEnd = fromTuple[:2]
126 |             fromSubWav = fromWavObj.extractSubsegment(fromStart, fromEnd)
127 |             assert len(fromSubWav.rawDataList) == len(
128 |                 expandedNormFactorList[offset : offset + chunkSize]
129 |             )
130 | 
131 |             tmpList = [
132 |                 fromSubWav.rawDataList,
133 |                 expandedNormFactorList[offset : offset + chunkSize],
134 |             ]
135 |             subRawDataList = [
136 |                 value * normFactor
137 |                 for value, normFactor in utils.safeZip(tmpList, enforceLength=True)
138 |             ]
139 |             newRawDataList.extend(subRawDataList)
140 | 
141 |             offset += chunkSize
142 | 
143 |         newWavObj = audio.WavObj(newRawDataList, fromWavObj.samplingRate)
144 |         newWavObj.save(join(outputDir, outputFN))
145 | 
146 |         interpolatedResults.append(newWavObj.rawDataList)
147 | 
148 |     plotFN = "%s_s%d_%d.png" % (outputFN, coreChunkSize, numSteps)
149 | 
150 |     if plotFlag:
151 |         plotMorphedData.plotIntensity(
152 |             fromDataList,
153 |             truncatedToList,
154 |             interpolatedResults,
155 |             expandedNormFactorList,
156 |             os.path.join(outputDir, plotFN),
157 |         )
158 | 
159 | 
160 | def getNormalizationFactor(lst, refLst=None):
161 |     """"""
162 | 
163 |     # Get the source values that we will be normalizing
164 |     lst = list(set(lst))
165 |     if 0 in lst:
166 |         lst.pop(lst.index(0))
167 | 
168 |     actMaxV = float(max(lst))
169 |     actMinV = float(min(lst))
170 | 
171 |     # Get the reference values
172 |     if refLst is None:
173 |         refMaxV = 32767.0
174 |         refMinV = -32767.0
175 |     else:
176 |         refLst = list(set(refLst))
177 |         if 0 in refLst:
178 |             refLst.pop(refLst.index(0))
179 | 
180 |         refMaxV = float(max(refLst))
181 |         refMinV = float(min(refLst))
182 | 
183 |     actualFactor = min(refMaxV / actMaxV, abs(refMinV) / abs(actMinV))
184 |     #     print("Normalization factor: ", actualFactor)
185 | 
186 |     return actualFactor
187 | 
188 | 
189 | def getRelativeNormalizedFactors(fromDataList, toDataList, chunkSize):
190 |     """
191 |     Determines the factors to be used to normalize sourceWav from  targetWav
192 | 
193 |     This can be used to relatively normalize the source based on the target
194 |     on an iterative basis (small chunks are normalized rather than the entire
195 |     wav.
196 |     """
197 | 
198 |     # Sample proportionately from the targetWav
199 |     # - if the two lists are the same length, there is no change
200 |     # - if /target/ is shorter, it will be lengthened with some repeated values
201 |     # - if /target/ is longer, it will be shortened with some values dropped
202 |     tmpIndexList = sequences.interp(0, len(toDataList) - 1, fromDataList)
203 |     newTargetRawDataList = [toDataList[int(round(i))] for i in tmpIndexList]
204 | 
205 |     assert len(fromDataList) == len(newTargetRawDataList)
206 | 
207 |     fromGen = sequences.subsequenceGenerator(
208 |         fromDataList, chunkSize, sequences.sampleMiddle, sequences.DO_SAMPLE_GATED
209 |     )
210 |     toGen = sequences.subsequenceGenerator(
211 |         newTargetRawDataList,
212 |         chunkSize,
213 |         sequences.sampleMiddle,
214 |         sequences.DO_SAMPLE_GATED,
215 |     )
216 | 
217 |     normFactorList = []
218 |     i = 0
219 |     for fromTuple, toTuple in zip(fromGen, toGen):
220 |         fromDataChunk = fromTuple[0]
221 |         toDataChunk = toTuple[0]
222 |         distToNextControlPoint = fromTuple[2]
223 |         normFactor = getNormalizationFactor(fromDataChunk, toDataChunk)
224 |         normFactorList.append((normFactor, distToNextControlPoint))
225 |     #         i += 1
226 |     #         if i >= 38:
227 |     #             print("hello")
228 | 
229 |     #     print(len(sourceWav.rawDataList), allChunks)
230 |     #     assert(len(sourceWav.rawDataList) == allChunks)
231 |     return normFactorList, newTargetRawDataList
232 | 
233 | 
234 | def expandNormalizationFactors(normFactorList):
235 |     """
236 |     Expands the normFactorList from being chunk-based to sample-based
237 | 
238 |     E.g. A wav with 1000 samples may be represented by a factorList of 5 chunks
239 |     (5 factor values).  This function will expand that to 1000.
240 |     """
241 | 
242 |     i = 0
243 |     normFactorsFull = []
244 |     controlPoints = []
245 |     while i < len(normFactorList) - 1:
246 |         startVal, chunkSize = normFactorList[i]
247 |         endVal = normFactorList[i + 1][0]
248 |         normFactorsFull.extend(my_math.linspace(startVal, endVal, chunkSize))
249 | 
250 |         controlPoints.append(startVal)
251 |         controlPoints.extend(my_math.linspace(startVal, startVal, chunkSize - 1))
252 |         i += 1
253 | 
254 |     # We have no more data, so just repeat the final norm factor at the tail
255 |     # of the file
256 |     value, finalChunkSize = normFactorList[i]
257 |     controlPoints.append(value)
258 |     controlPoints.extend(my_math.linspace(startVal, startVal, finalChunkSize - 1))
259 |     normFactorsFull.extend(my_math.linspace(value, value, finalChunkSize))
260 | 
261 |     print("Norm factors full: %d" % len(normFactorsFull))
262 |     return normFactorsFull, controlPoints
263 | 


--------------------------------------------------------------------------------
/pyacoustics/signals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/signals/__init__.py


--------------------------------------------------------------------------------
/pyacoustics/signals/audio_scripts.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Aug 23, 2014
  3 | 
  4 | @author: tmahrt
  5 | """
  6 | 
  7 | import os
  8 | from os.path import join
  9 | 
 10 | import struct
 11 | import wave
 12 | import audioop
 13 | 
 14 | from pyacoustics.utilities import utils
 15 | 
 16 | 
 17 | def loadWavFile(wavFN):
 18 |     sampWidthDict = {1: "b", 2: "h", 4: "i", 8: "q"}
 19 |     audiofile = wave.open(wavFN, "r")
 20 | 
 21 |     params = audiofile.getparams()
 22 |     sampwidth = params[1]
 23 |     nframes = params[3]
 24 | 
 25 |     byteCode = sampWidthDict[sampwidth]
 26 |     waveData = audiofile.readframes(nframes)
 27 |     audioFrameList = struct.unpack("<" + byteCode * nframes, waveData)
 28 | 
 29 |     return audioFrameList, params
 30 | 
 31 | 
 32 | def resampleAudio(soxEXE, newSampleRate, inputPath, fn, outputPath=None):
 33 |     """
 34 | 
 35 |     Mac: "/opt/local/bin/sox"
 36 |     Windows: "C:\Program Files (x86)\sox-14-4-2\sox.exe"
 37 |     """
 38 |     if outputPath is None:
 39 |         outputPath = join(inputPath, "resampled_wavs")
 40 |     utils.makeDir(outputPath)
 41 | 
 42 |     soxCmd = "%s %s -r %f %s rate -v 96k" % (
 43 |         soxEXE,
 44 |         join(inputPath, fn),
 45 |         newSampleRate,
 46 |         join(outputPath, fn),
 47 |     )
 48 |     os.system(soxCmd)
 49 | 
 50 | 
 51 | def getSerializedFileDuration(fn):
 52 |     name = os.path.splitext(fn)[0]
 53 |     durationFN = name + "_duration.txt"
 54 |     if not os.path.exists(durationFN):
 55 |         duration = getSoundFileDuration(fn)
 56 |         try:
 57 |             with open(durationFN, "w") as fd:
 58 |                 fd.write(str(duration))
 59 |         except IOError:
 60 |             # If we don't have write permissions, there isn't anything we can
 61 |             # do, the user should still be able to get their data
 62 |             pass
 63 |     else:
 64 |         with open(durationFN, "r") as fd:
 65 |             duration = float(fd.read())
 66 | 
 67 |     return duration
 68 | 
 69 | 
 70 | def getSoundFileDuration(fn):
 71 |     """
 72 |     Returns the duration of a wav file (in seconds)
 73 |     """
 74 |     audiofile = wave.open(fn, "r")
 75 | 
 76 |     params = audiofile.getparams()
 77 |     framerate = params[2]
 78 |     nframes = params[3]
 79 | 
 80 |     duration = float(nframes) / framerate
 81 |     return duration
 82 | 
 83 | 
 84 | def getParams(fn):
 85 |     audiofile = wave.open(fn, "r")
 86 | 
 87 |     params = audiofile.getparams()
 88 | 
 89 |     return params
 90 | 
 91 | 
 92 | def reduceToSingleChannel(fn, outputFN, leftFactor=1, rightFactor=0):
 93 |     audiofile = wave.open(fn, "r")
 94 | 
 95 |     params = audiofile.getparams()
 96 |     sampwidth = params[1]
 97 |     nframes = params[3]
 98 |     audioFrames = audiofile.readframes(nframes)
 99 | 
100 |     monoAudioFrames = audioop.tomono(audioFrames, sampwidth, leftFactor, rightFactor)
101 |     params = tuple(
102 |         [
103 |             1,
104 |         ]
105 |         + list(params[1:])
106 |     )
107 | 
108 |     outputAudiofile = wave.open(outputFN, "w")
109 |     outputAudiofile.setparams(params)
110 |     outputAudiofile.writeframes(monoAudioFrames)
111 | 
112 | 
113 | def modifySampleWidth(fn, outputFN, newSampleWidth):
114 |     sampWidthDict = {1: "b", 2: "h", 4: "i", 8: "q"}
115 | 
116 |     audiofile = wave.open(fn, "r")
117 |     params = audiofile.getparams()
118 |     sampwidth = params[1]
119 |     nframes = params[3]
120 |     waveData = audiofile.readframes(nframes)
121 | 
122 |     sampleCode = sampWidthDict[sampwidth]
123 |     newSampleCode = sampWidthDict[newSampleWidth]
124 | 
125 |     audioFrameList = struct.unpack("<" + sampleCode * nframes, waveData)
126 |     outputByteStr = struct.pack("<" + newSampleCode * nframes, *audioFrameList)
127 | 
128 |     if newSampleWidth is not None:
129 |         params = (
130 |             list(params[:2])
131 |             + [
132 |                 newSampleWidth,
133 |             ]
134 |             + list(params[3:])
135 |         )
136 |         params = tuple(params)
137 | 
138 |     outputAudiofile = wave.open(outputFN, "w")
139 |     outputAudiofile.setparams(params)
140 |     outputAudiofile.writeframes(outputByteStr)
141 | 
142 | 
143 | def monoToStereo(fnL, fnR, outputFN, lfactor=1.0, rfactor=1.0):
144 |     """
145 |     Given two audio files, combines them into a stereo audio file
146 | 
147 |     Derived mostly from the official python documentation
148 |     https://docs.python.org/2/library/audioop.html
149 |     """
150 | 
151 |     def _monoToStereo(fn, leftBalance, rightBalance):
152 |         audiofile = wave.open(fn, "r")
153 |         params = audiofile.getparams()
154 |         sampwidth = params[1]
155 |         nframes = params[3]
156 | 
157 |         waveData = audiofile.readframes(nframes)
158 |         sample = audioop.tostereo(waveData, sampwidth, leftBalance, rightBalance)
159 | 
160 |         return sample, params
161 | 
162 |     lsample, params = _monoToStereo(fnL, lfactor, 1 - lfactor)
163 |     rsample = _monoToStereo(fnR, 1 - rfactor, rfactor)[0]
164 | 
165 |     sampwidth, framerate, nframes, comptype, compname = params[1:]
166 | 
167 |     stereoSamples = audioop.add(lsample, rsample, sampwidth)
168 | 
169 |     outputAudiofile = wave.open(outputFN, "w")
170 | 
171 |     params = [2, sampwidth, framerate, nframes, comptype, compname]
172 |     outputAudiofile.setparams(params)
173 |     outputAudiofile.writeframes(stereoSamples)
174 | 
175 | 
176 | def splitStereoAudio(path, fn, outputPath=None):
177 |     if outputPath is None:
178 |         outputPath = join(path, "split_audio")
179 | 
180 |     if not os.path.exists(outputPath):
181 |         os.mkdir(outputPath)
182 | 
183 |     name = os.path.splitext(fn)[0]
184 | 
185 |     fnFullPath = join(path, fn)
186 |     leftOutputFN = join(outputPath, "%s_L.wav" % name)
187 |     rightOutputFN = join(outputPath, "%s_R.wav" % name)
188 | 
189 |     audiofile = wave.open(fnFullPath, "r")
190 | 
191 |     params = audiofile.getparams()
192 |     sampwidth = params[1]
193 |     nframes = params[3]
194 |     audioFrames = audiofile.readframes(nframes)
195 | 
196 |     for leftFactor, rightFactor, outputFN in (
197 |         (1, 0, leftOutputFN),
198 |         (0, 1, rightOutputFN),
199 |     ):
200 |         monoAudioFrames = audioop.tomono(
201 |             audioFrames, sampwidth, leftFactor, rightFactor
202 |         )
203 |         params = tuple(
204 |             [
205 |                 1,
206 |             ]
207 |             + list(params[1:])
208 |         )
209 | 
210 |         outputAudiofile = wave.open(outputFN, "w")
211 |         outputAudiofile.setparams(params)
212 |         outputAudiofile.writeframes(monoAudioFrames)
213 | 
214 | 
215 | def getSubwav(fn, startT, endT, singleChannelFlag):
216 |     audiofile = wave.open(fn, "r")
217 | 
218 |     params = audiofile.getparams()
219 |     nchannels = params[0]
220 |     sampwidth = params[1]
221 |     framerate = params[2]
222 | 
223 |     # Extract the audio frames
224 |     audiofile.setpos(int(framerate * startT))
225 |     audioFrames = audiofile.readframes(int(framerate * (endT - startT)))
226 | 
227 |     # Convert to single channel if needed
228 |     if singleChannelFlag is True and nchannels > 1:
229 |         audioFrames = audioop.tomono(audioFrames, sampwidth, 1, 0)
230 |         nchannels = 1
231 | 
232 |     return audioFrames
233 | 
234 | 
235 | def extractSubwav(fn, outputFN, startT, endT, singleChannelFlag):
236 |     audiofile = wave.open(fn, "r")
237 |     params = audiofile.getparams()
238 |     nchannels = params[0]
239 |     sampwidth = params[1]
240 |     framerate = params[2]
241 |     comptype = params[4]
242 |     compname = params[5]
243 | 
244 |     print([fn, startT, endT])
245 |     audioFrames = getSubwav(fn, startT, endT, singleChannelFlag)
246 | 
247 |     if singleChannelFlag is True and nchannels > 1:
248 |         nchannels = 1
249 | 
250 |     outParams = [nchannels, sampwidth, framerate, len(audioFrames), comptype, compname]
251 | 
252 |     outWave = wave.open(outputFN, "w")
253 |     outWave.setparams(outParams)
254 |     outWave.writeframes(audioFrames)
255 | 


--------------------------------------------------------------------------------
/pyacoustics/signals/data_fitting.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Jul 6, 2015
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | from sklearn import mixture
 8 | 
 9 | from scipy import stats
10 | from scipy.stats import norm
11 | import matplotlib.pyplot as plot
12 | import matplotlib.mlab
13 | import numpy as np
14 | 
15 | 
16 | def getPDF(ddata, numSamples=50, minV=None, maxV=None):
17 |     pdf = stats.gaussian_kde(ddata)
18 | 
19 |     if minV is None:
20 |         minV = min(ddata)
21 |     if maxV is None:
22 |         maxV = max(ddata)
23 | 
24 |     xValues = np.linspace(minV, maxV, numSamples)
25 | 
26 |     yValues = pdf(xValues)
27 | 
28 |     return xValues, yValues
29 | 
30 | 
31 | def getBimodalValley(data, numSamples=100, doplot=True):
32 |     """
33 |     Returns the smallest value between the peaks of a bimodal distribution
34 |     """
35 | 
36 |     # Build GMM, fit it to the data, and get GMM parameters
37 |     # The two means are used as the start and end point of a our search
38 |     # for the smallest value between the two distributions.
39 | 
40 |     ncomp = 2  # Could be parameterized later if needed
41 | 
42 |     clf = mixture.GaussianMixture(n_components=ncomp, covariance_type="full")
43 |     clf.fit(
44 |         [
45 |             [
46 |                 item,
47 |             ]
48 |             for item in data
49 |         ]
50 |     )
51 |     ml = clf.means_
52 |     wl = clf.weights_
53 |     cl = clf.covariances_
54 |     ms = [m[0] for m in ml]
55 |     cs = [np.sqrt(c[0][0]) for c in cl]
56 |     ws = [w for w in wl]
57 | 
58 |     # Find the smallest point in the pdf between the means
59 |     startV = int(min(ms))
60 |     endV = int(max(ms))
61 | 
62 |     pdfX, pdfY = getPDF(data, numSamples, startV, endV)
63 |     minY = min(pdfY)
64 |     minX = pdfX[[float(x) for x in pdfY].index(minY)]
65 | 
66 |     # Plot result if requested
67 |     if doplot is True:
68 |         histo = plot.hist(data, numSamples)
69 |         for w, m, c in zip(ws, ms, cs):
70 |             normedPDF = norm.pdf(histo[1], m, np.sqrt(c))
71 |             plot.plot(histo[1], w * normedPDF, linewidth=3)
72 |         plot.plot(pdfX, pdfY, linewidth=2)
73 |         plot.axvline(minX)
74 |         plot.show()
75 | 
76 |     return minX
77 | 


--------------------------------------------------------------------------------
/pyacoustics/speech_detection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/speech_detection/__init__.py


--------------------------------------------------------------------------------
/pyacoustics/speech_detection/common.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Jun 7, 2015
  3 | 
  4 | @author: tmahrt
  5 | """
  6 | 
  7 | import struct
  8 | import wave
  9 | import math
 10 | 
 11 | from pyacoustics.signals import audio_scripts
 12 | 
 13 | 
 14 | class EndOfAudioData(Exception):
 15 |     pass
 16 | 
 17 | 
 18 | def getSoundFileDuration(fn):
 19 |     """
 20 |     Returns the duration of a wav file (in seconds)
 21 |     """
 22 |     audiofile = wave.open(fn, "r")
 23 | 
 24 |     params = audiofile.getparams()
 25 |     framerate = params[2]
 26 |     nframes = params[3]
 27 | 
 28 |     duration = float(nframes) / framerate
 29 |     return duration
 30 | 
 31 | 
 32 | def openAudioFile(fn):
 33 |     audiofile = wave.open(fn, "r")
 34 | 
 35 |     params = audiofile.getparams()
 36 |     sampwidth = params[1]
 37 |     framerate = params[2]
 38 | 
 39 |     return audiofile, sampwidth, framerate
 40 | 
 41 | 
 42 | def rms(audioFrameList):
 43 |     audioFrameList = [val**2 for val in audioFrameList]
 44 |     meanVal = sum(audioFrameList) / len(audioFrameList)
 45 |     return math.sqrt(meanVal)
 46 | 
 47 | 
 48 | def overlapCheck(interval, cmprInterval, percentThreshold=0):
 49 |     """Checks whether two intervals overlap"""
 50 | 
 51 |     startTime, endTime = interval[0], interval[1]
 52 |     cmprStartTime, cmprEndTime = cmprInterval[0], cmprInterval[1]
 53 | 
 54 |     overlapTime = min(endTime, cmprEndTime) - max(startTime, cmprStartTime)
 55 |     overlapTime = max(0, overlapTime)
 56 |     overlapFlag = overlapTime > 0
 57 | 
 58 |     if percentThreshold > 0 and overlapFlag:
 59 |         totalTime = max(endTime, cmprEndTime) - min(startTime, cmprStartTime)
 60 |         percentOverlap = overlapTime / float(totalTime)
 61 | 
 62 |         overlapFlag = percentOverlap >= percentThreshold
 63 | 
 64 |     return overlapFlag
 65 | 
 66 | 
 67 | def getMinMaxAmplitude(wavFN, stepSize, entryList=None):
 68 |     audiofile = openAudioFile(wavFN)[0]
 69 | 
 70 |     # By default, find the min and max amplitude for the whole file
 71 |     if entryList is None:
 72 |         stop = audio_scripts.getSoundFileDuration(wavFN)
 73 |         entryList = [
 74 |             (0, stop),
 75 |         ]
 76 | 
 77 |     # Accumulate relevant energy values
 78 |     rmsList = []
 79 |     for entry in entryList:
 80 |         start, stop = entry[0], entry[1]
 81 |         currentTime = start
 82 |         while currentTime < stop:
 83 |             rmsList.append(rmsNextFrames(audiofile, stepSize))
 84 |             currentTime += stepSize
 85 | 
 86 |     # Return the min and max values
 87 |     minValue = min(rmsList)
 88 |     maxValue = max(rmsList)
 89 | 
 90 |     return minValue, maxValue
 91 | 
 92 | 
 93 | def rmsNextFrames(audiofile, stepSize, normMinVal=None, normMaxVal=None):
 94 |     params = audiofile.getparams()
 95 |     sampwidth, framerate = params[1], params[2]
 96 | 
 97 |     numFrames = int(framerate * stepSize)
 98 |     waveData = audiofile.readframes(numFrames)
 99 | 
100 |     if len(waveData) == 0:
101 |         raise EndOfAudioData()
102 | 
103 |     actualNumFrames = int(len(waveData) / float(sampwidth))
104 |     audioFrameList = struct.unpack("<" + "h" * actualNumFrames, waveData)
105 | 
106 |     rmsEnergy = rms(audioFrameList)
107 | 
108 |     if normMinVal is not None and normMaxVal is not None:
109 |         rmsEnergy = (rmsEnergy - normMinVal) / (normMaxVal - normMinVal)
110 | 
111 |     return rmsEnergy
112 | 
113 | 
114 | def mergeAdjacentEntries(entryList):
115 |     i = 0
116 |     while i < len(entryList) - 1:
117 |         if entryList[i][1] == entryList[i + 1][0]:
118 |             startEntry = entryList.pop(i)
119 |             nextEntry = entryList.pop(i)
120 | 
121 |             entryList.insert(i, (startEntry[0], nextEntry[1]))
122 |         else:
123 |             i += 1
124 | 
125 |     return entryList
126 | 
127 | 
128 | def cropUnusedPortion(entry, start, stop):
129 |     retEntryList = []
130 | 
131 |     if entry[0] < start:
132 |         retEntryList.append((entry[0], start))
133 | 
134 |     if entry[1] > stop:
135 |         retEntryList.append((stop, entry[1]))
136 | 
137 |     return retEntryList
138 | 


--------------------------------------------------------------------------------
/pyacoustics/speech_detection/naive_vad.py:
--------------------------------------------------------------------------------
  1 | import wave
  2 | 
  3 | from pyacoustics.speech_detection import common
  4 | 
  5 | 
  6 | def _findNextEvent(
  7 |     sampleList,
  8 |     startTime,
  9 |     silenceThreshold,
 10 |     sampleFreq,
 11 |     stepSize,
 12 |     numSteps,
 13 |     findSilence=True,
 14 | ):
 15 |     """
 16 | 
 17 |     if findSilence=False then search for sound
 18 |     """
 19 | 
 20 |     # Extract the audio frames
 21 |     i = 0
 22 |     currentSequenceNum = 0
 23 |     while currentSequenceNum < numSteps:
 24 |         currentTime = startTime + i * stepSize
 25 |         nextTime = startTime + (i + 1) * stepSize
 26 | 
 27 |         audioFrameList = sampleList[
 28 |             int(round(currentTime * sampleFreq)) : int(round(nextTime * sampleFreq))
 29 |         ]
 30 | 
 31 |         if len(audioFrameList) == 0:
 32 |             raise common.EndOfAudioData()
 33 | 
 34 |         rmsEnergy = common.rms(audioFrameList)
 35 | 
 36 |         if (findSilence is True and rmsEnergy < silenceThreshold) or (
 37 |             findSilence is False and rmsEnergy > silenceThreshold
 38 |         ):
 39 |             currentSequenceNum += 1
 40 |         else:
 41 |             currentSequenceNum = 0
 42 |         i += 1
 43 | 
 44 |     endTime = startTime + (i - numSteps) * stepSize
 45 | 
 46 |     return endTime
 47 | 
 48 | 
 49 | def naiveVAD(
 50 |     sampleList, silenceThreshold, sampleFreq, stepSize, numSteps, startTime=0.0
 51 | ):
 52 |     endTime = _findNextEvent(
 53 |         sampleList,
 54 |         startTime,
 55 |         silenceThreshold,
 56 |         sampleFreq,
 57 |         stepSize,
 58 |         numSteps,
 59 |         findSilence=True,
 60 |     )
 61 | 
 62 |     # Each iteration begins at a non-silence event and ends in a new
 63 |     # silence event (i.e. spans the interval of the non-silence)
 64 |     entryList = []
 65 |     try:
 66 |         while True:
 67 |             startTime = _findNextEvent(
 68 |                 sampleList,
 69 |                 endTime,
 70 |                 silenceThreshold,
 71 |                 sampleFreq,
 72 |                 stepSize,
 73 |                 numSteps,
 74 |                 findSilence=False,
 75 |             )
 76 | 
 77 |             endTime = _findNextEvent(
 78 |                 sampleList,
 79 |                 startTime,
 80 |                 silenceThreshold,
 81 |                 sampleFreq,
 82 |                 stepSize,
 83 |                 numSteps,
 84 |                 findSilence=True,
 85 |             )
 86 |             entryList.append((startTime, endTime))
 87 | 
 88 |     except (common.EndOfAudioData, wave.Error):
 89 |         pass  # Stop processing
 90 | 
 91 |     return entryList
 92 | 
 93 | 
 94 | def getIntensityPercentile(sampleList, cutoffPercent):
 95 |     """
 96 |     Returns the nth percent of energy represented in a dataset
 97 |     """
 98 |     tmpSampleList = sorted(sampleList)
 99 | 
100 |     return tmpSampleList[int(len(tmpSampleList) * cutoffPercent)]
101 | 
102 | 
103 | def cropSilenceInEdges(sampleList, silenceThreshold, sampleFreq):
104 |     """
105 |     Returns the left and right boundaries of the meaningful data in a wav file
106 |     """
107 |     startI = 0
108 |     while sampleList[startI] < silenceThreshold:
109 |         startI += 1
110 | 
111 |     endI = len(sampleList) - 1
112 |     while sampleList[endI] < silenceThreshold:
113 |         endI -= 1
114 | 
115 |     startTime = startI * sampleFreq
116 |     endTime = endI * sampleFreq
117 | 
118 |     return startTime, endTime
119 | 


--------------------------------------------------------------------------------
/pyacoustics/speech_detection/naive_vad_efficient.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import struct
 3 | import wave
 4 | 
 5 | from pyacoustics.speech_detection import common
 6 | 
 7 | 
 8 | def findNextEvent(
 9 |     fn, startTime, silenceThreshold, stepSize, numSteps, findSilence=True
10 | ):
11 |     """
12 | 
13 |     if findSilence=False then search for sound
14 |     """
15 | 
16 |     audiofile, sampwidth, framerate = common.openAudioFile(fn)
17 | 
18 |     # Extract the audio frames
19 |     i = 0
20 |     currentSequenceNum = 0
21 |     audiofile.setpos(int(framerate * startTime))
22 |     while currentSequenceNum < numSteps:
23 |         numFrames = int(framerate * stepSize)
24 |         waveData = audiofile.readframes(numFrames)
25 | 
26 |         if len(waveData) == 0:
27 |             raise common.EndOfAudioData()
28 | 
29 |         actualNumFrames = int(len(waveData) / float(sampwidth))
30 |         audioFrameList = struct.unpack("<" + "h" * actualNumFrames, waveData)
31 | 
32 |         rmsEnergy = common.rms(audioFrameList)
33 |         print(rmsEnergy)
34 | 
35 |         if (findSilence is True and rmsEnergy < silenceThreshold) or (
36 |             findSilence is False and rmsEnergy > silenceThreshold
37 |         ):
38 |             currentSequenceNum += 1
39 |         else:
40 |             currentSequenceNum = 0
41 |         i += 1
42 | 
43 |     endTime = startTime + (i - numSteps) * stepSize
44 | 
45 |     return endTime
46 | 
47 | 
48 | def naiveVAD(wavFN, silenceThreshold, stepSize, numSteps, startTime=0.0):
49 |     endTime = findNextEvent(
50 |         wavFN, startTime, silenceThreshold, stepSize, numSteps, findSilence=True
51 |     )
52 | 
53 |     # Each iteration begins at a non-silence event and ends in a new
54 |     # silence event (i.e. spans the interval of the non-silence)
55 |     entryList = []
56 |     try:
57 |         while True:
58 |             startTime = findNextEvent(
59 |                 wavFN, endTime, silenceThreshold, stepSize, numSteps, findSilence=False
60 |             )
61 | 
62 |             endTime = findNextEvent(
63 |                 wavFN, startTime, silenceThreshold, stepSize, numSteps, findSilence=True
64 |             )
65 |             entryList.append((startTime, endTime))
66 | 
67 |     except (common.EndOfAudioData, wave.Error):
68 |         pass  # Stop processing
69 | 
70 |     return entryList
71 | 


--------------------------------------------------------------------------------
/pyacoustics/speech_detection/segment_stereo_speech.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Nov 4, 2014
  3 | 
  4 | @author: tmahrt
  5 | """
  6 | 
  7 | from pyacoustics.speech_detection import common
  8 | 
  9 | 
 10 | def findNextSpeaker(
 11 |     leftSamples,
 12 |     rightSamples,
 13 |     samplingFreq,
 14 |     startTime,
 15 |     analyzeStop,
 16 |     stepSize,
 17 |     numSteps,
 18 |     findLeft=True,
 19 | ):
 20 |     """"""
 21 | 
 22 |     # Extract the audio frames
 23 |     i = 0
 24 |     currentSequenceNum = 0
 25 |     while currentSequenceNum < numSteps:
 26 |         # Stop analyzing once we've reached the end of this interval
 27 |         currentTime = startTime + i * stepSize
 28 |         nextTime = startTime + ((i + 1) * stepSize)
 29 | 
 30 |         if nextTime > analyzeStop:
 31 |             raise common.EndOfAudioData()
 32 | 
 33 |         leftRMSEnergy = common.rms(
 34 |             leftSamples[int(currentTime * samplingFreq) : int(nextTime * samplingFreq)]
 35 |         )
 36 |         rightRMSEnergy = common.rms(
 37 |             rightSamples[int(currentTime * samplingFreq) : int(nextTime * samplingFreq)]
 38 |         )
 39 | 
 40 |         if (findLeft is True and leftRMSEnergy >= rightRMSEnergy) or (
 41 |             findLeft is False and leftRMSEnergy <= rightRMSEnergy
 42 |         ):
 43 |             currentSequenceNum += 1
 44 |         else:
 45 |             currentSequenceNum = 0
 46 |         i += 1
 47 | 
 48 |     endTime = startTime + (i - numSteps) * stepSize
 49 | 
 50 |     return endTime
 51 | 
 52 | 
 53 | def assignAudioEventsForEntries(
 54 |     leftSamples,
 55 |     rightSamples,
 56 |     samplingFreq,
 57 |     leftEntry,
 58 |     rightEntry,
 59 |     stepSize,
 60 |     speakerNumSteps,
 61 | ):
 62 |     """
 63 |     Start up and tear down function for assignAudioEvents()
 64 |     """
 65 | 
 66 |     # Find the overlap interval and preserve the non-overlapped portions
 67 |     start = max(leftEntry[0], rightEntry[0])
 68 |     stop = min(leftEntry[1], rightEntry[1])
 69 | 
 70 |     leftEntryList = common.cropUnusedPortion(leftEntry, start, stop)
 71 |     rightEntryList = common.cropUnusedPortion(rightEntry, start, stop)
 72 | 
 73 |     # Determine who is speaking in overlapped portions
 74 |     tmpEntries = assignAudioEvents(
 75 |         leftSamples, rightSamples, samplingFreq, start, stop, stepSize, speakerNumSteps
 76 |     )
 77 | 
 78 |     leftEntryList.extend(tmpEntries[0])
 79 |     rightEntryList.extend(tmpEntries[1])
 80 | 
 81 |     # Merge adjacent regions sharing a boundary, if any
 82 |     leftEntryList.sort()
 83 |     rightEntryList.sort()
 84 | 
 85 |     leftEntryList = common.mergeAdjacentEntries(leftEntryList)
 86 |     rightEntryList = common.mergeAdjacentEntries(rightEntryList)
 87 | 
 88 |     return leftEntryList, rightEntryList
 89 | 
 90 | 
 91 | def assignAudioEvents(
 92 |     leftSamples,
 93 |     rightSamples,
 94 |     samplingFreq,
 95 |     startTime,
 96 |     analyzeStop,
 97 |     stepSize,
 98 |     speakerNumSteps,
 99 | ):
100 |     findLeft = True
101 |     leftEntryList = []
102 |     rightEntryList = []
103 |     try:
104 |         while True:
105 |             endTime = findNextSpeaker(
106 |                 leftSamples,
107 |                 rightSamples,
108 |                 samplingFreq,
109 |                 startTime,
110 |                 analyzeStop,
111 |                 stepSize,
112 |                 speakerNumSteps,
113 |                 findLeft,
114 |             )
115 | 
116 |             if endTime > analyzeStop:
117 |                 endTime = analyzeStop
118 | 
119 |             if startTime != endTime:
120 |                 entry = (startTime, endTime)
121 |                 if findLeft:
122 |                     leftEntryList.append(entry)
123 |                 else:
124 |                     rightEntryList.append(entry)
125 | 
126 |             print("%f, %f, %f" % (startTime, endTime, analyzeStop))
127 |             startTime = endTime
128 |             findLeft = not findLeft
129 | 
130 |     except common.EndOfAudioData:  # Stop processing
131 |         if analyzeStop - startTime > stepSize * speakerNumSteps:
132 |             finalEntry = (startTime, analyzeStop)
133 |             if findLeft:
134 |                 leftEntryList.append(finalEntry)
135 |             else:
136 |                 rightEntryList.append(finalEntry)
137 | 
138 |     return leftEntryList, rightEntryList
139 | 
140 | 
141 | def autosegmentStereoAudio(
142 |     leftSamples,
143 |     rightSamples,
144 |     samplingFreq,
145 |     leftEntryList,
146 |     rightEntryList,
147 |     stepSize,
148 |     speakerNumSteps,
149 | ):
150 |     overlapThreshold = 0
151 |     overlapCheck = lambda entry, entryList: [
152 |         not common.overlapCheck(entry, cmprEntry, overlapThreshold)
153 |         for cmprEntry in entryList
154 |     ]
155 | 
156 |     # First add all of the entries with no overlap
157 |     newLeftEntryList = []
158 |     for leftEntry in leftEntryList:
159 |         if all(overlapCheck(leftEntry, rightEntryList)):
160 |             newLeftEntryList.append(leftEntry)
161 | 
162 |     newRightEntryList = []
163 |     for rightEntry in rightEntryList:
164 |         if all(overlapCheck(rightEntry, leftEntryList)):
165 |             newRightEntryList.append(rightEntry)
166 | 
167 |     # For all entries with overlap, split them by speaker
168 |     # Utilizing the left channel as a base, this chunks through all overlapping
169 |     # in a single pass of the left channel, until there are no more overlapping
170 |     # segments between the right and left channels.
171 |     i = 0
172 |     while i < len(leftEntryList):
173 |         # Check if there are any segments in the right channel that overlap
174 |         # with the current segment in the left channel.  If not, move to
175 |         # the next segment.
176 |         leftEntry = leftEntryList[i]
177 |         overlapCheckList = overlapCheck(leftEntry, rightEntryList)
178 |         if all(overlapCheckList):
179 |             i += 1
180 |             continue
181 | 
182 |         # Otherwise, resolve the first segment in the right channel that
183 |         # overlaps with the current segment
184 |         leftEntry = leftEntryList.pop(i)
185 | 
186 |         j = overlapCheckList.index(False)  # Find the first overlap
187 |         rightEntry = rightEntryList.pop(j)
188 | 
189 |         entryTuple = assignAudioEventsForEntries(
190 |             leftSamples,
191 |             rightSamples,
192 |             samplingFreq,
193 |             leftEntry,
194 |             rightEntry,
195 |             stepSize,
196 |             speakerNumSteps,
197 |         )
198 |         tmpLeftEntryList, tmpRightEntryList = entryTuple
199 | 
200 |         leftEntryList[i:i] = tmpLeftEntryList
201 |         rightEntryList[j:j] = tmpRightEntryList
202 | 
203 |     # Combine the original non-overlapping segments with the adjusted segments
204 |     newLeftEntryList.extend(leftEntryList)
205 |     newRightEntryList.extend(rightEntryList)
206 | 
207 |     newLeftEntryList.sort()
208 |     newRightEntryList.sort()
209 | 
210 |     newLeftEntryList = [
211 |         entry
212 |         for entry in newLeftEntryList
213 |         if (entry[1] - entry[0] > stepSize * speakerNumSteps)
214 |     ]
215 |     newRightEntryList = [
216 |         entry
217 |         for entry in newRightEntryList
218 |         if (entry[1] - entry[0] > stepSize * speakerNumSteps)
219 |     ]
220 | 
221 |     return newLeftEntryList, newRightEntryList
222 | 


--------------------------------------------------------------------------------
/pyacoustics/speech_detection/segment_stereo_speech_efficient.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Nov 4, 2014
  3 | 
  4 | @author: tmahrt
  5 | """
  6 | 
  7 | from pyacoustics.speech_detection import common
  8 | 
  9 | 
 10 | def findNextSpeaker(
 11 |     leftFN,
 12 |     rightFN,
 13 |     startTime,
 14 |     analyzeStop,
 15 |     stepSize,
 16 |     numSteps,
 17 |     findLeft=True,
 18 |     leftMin=None,
 19 |     leftMax=None,
 20 |     rightMin=None,
 21 |     rightMax=None,
 22 | ):
 23 |     """"""
 24 | 
 25 |     audioTuple = common.openAudioFile(leftFN)
 26 |     leftAudioFile = audioTuple[0]
 27 |     framerate = audioTuple[2]
 28 |     rightAudioFile = common.openAudioFile(rightFN)[0]
 29 | 
 30 |     # Extract the audio frames
 31 |     i = 0
 32 |     currentSequenceNum = 0
 33 |     leftAudioFile.setpos(int(framerate * startTime))
 34 |     rightAudioFile.setpos(int(framerate * startTime))
 35 |     while currentSequenceNum < numSteps:
 36 |         # Stop analyzing once we've reached the end of this interval
 37 |         currentTime = startTime + i * stepSize
 38 | 
 39 |         if currentTime >= analyzeStop:
 40 |             raise common.EndOfAudioData()
 41 | 
 42 |         leftRMSEnergy = common.rmsNextFrames(leftAudioFile, stepSize, leftMin, leftMax)
 43 |         rightRMSEnergy = common.rmsNextFrames(
 44 |             rightAudioFile, stepSize, rightMin, rightMax
 45 |         )
 46 | 
 47 |         if (findLeft is True and leftRMSEnergy >= rightRMSEnergy) or (
 48 |             findLeft is False and leftRMSEnergy <= rightRMSEnergy
 49 |         ):
 50 |             currentSequenceNum += 1
 51 |         else:
 52 |             currentSequenceNum = 0
 53 |         i += 1
 54 | 
 55 |     endTime = startTime + (i - numSteps) * stepSize
 56 | 
 57 |     return endTime
 58 | 
 59 | 
 60 | def assignAudioEventsForEntries(
 61 |     leftFN,
 62 |     rightFN,
 63 |     leftEntry,
 64 |     rightEntry,
 65 |     stepSize,
 66 |     speakerNumSteps,
 67 |     leftMin,
 68 |     leftMax,
 69 |     rightMin,
 70 |     rightMax,
 71 | ):
 72 |     """
 73 |     Start up and tear down function for assignAudioEvents()
 74 |     """
 75 | 
 76 |     # Find the overlap interval and preserve the non-overlapped portions
 77 |     start = max(leftEntry[0], rightEntry[0])
 78 |     stop = min(leftEntry[1], rightEntry[1])
 79 | 
 80 |     leftEntryList = common.cropUnusedPortion(leftEntry, start, stop)
 81 |     rightEntryList = common.cropUnusedPortion(rightEntry, start, stop)
 82 | 
 83 |     # Determine who is speaking in overlapped portions
 84 |     tmpEntries = assignAudioEvents(
 85 |         leftFN,
 86 |         rightFN,
 87 |         start,
 88 |         stop,
 89 |         stepSize,
 90 |         speakerNumSteps,
 91 |         leftMin,
 92 |         leftMax,
 93 |         rightMin,
 94 |         rightMax,
 95 |     )
 96 | 
 97 |     leftEntryList.extend(tmpEntries[0])
 98 |     rightEntryList.extend(tmpEntries[1])
 99 | 
100 |     # Merge adjacent regions sharing a boundary, if any
101 |     leftEntryList.sort()
102 |     rightEntryList.sort()
103 | 
104 |     leftEntryList = common.mergeAdjacentEntries(leftEntryList)
105 |     rightEntryList = common.mergeAdjacentEntries(rightEntryList)
106 | 
107 |     return leftEntryList, rightEntryList
108 | 
109 | 
110 | def assignAudioEvents(
111 |     leftFN,
112 |     rightFN,
113 |     startTime,
114 |     analyzeStop,
115 |     stepSize,
116 |     speakerNumSteps,
117 |     leftMin,
118 |     leftMax,
119 |     rightMin,
120 |     rightMax,
121 | ):
122 |     findLeft = True
123 |     leftEntryList = []
124 |     rightEntryList = []
125 |     try:
126 |         while True:
127 |             endTime = findNextSpeaker(
128 |                 leftFN,
129 |                 rightFN,
130 |                 startTime,
131 |                 analyzeStop,
132 |                 stepSize,
133 |                 speakerNumSteps,
134 |                 findLeft,
135 |                 leftMin,
136 |                 leftMax,
137 |                 rightMin,
138 |                 rightMax,
139 |             )
140 | 
141 |             if endTime > analyzeStop:
142 |                 endTime = analyzeStop
143 | 
144 |             if startTime != endTime:
145 |                 entry = (startTime, endTime)
146 |                 if findLeft:
147 |                     leftEntryList.append(entry)
148 |                 else:
149 |                     rightEntryList.append(entry)
150 | 
151 |             print("%f, %f, %f" % (startTime, endTime, analyzeStop))
152 |             startTime = endTime
153 |             findLeft = not findLeft
154 | 
155 |     except common.EndOfAudioData:  # Stop processing
156 |         if analyzeStop - startTime > stepSize * speakerNumSteps:
157 |             finalEntry = (startTime, analyzeStop)
158 |             if findLeft:
159 |                 leftEntryList.append(finalEntry)
160 |             else:
161 |                 rightEntryList.append(finalEntry)
162 | 
163 |     return leftEntryList, rightEntryList
164 | 
165 | 
166 | def autosegmentStereoAudio(
167 |     leftFN, rightFN, leftEntryList, rightEntryList, stepSize, speakerNumSteps
168 | ):
169 |     overlapThreshold = 0
170 |     overlapCheck = lambda entry, entryList: [
171 |         not common.overlapCheck(entry, cmprEntry, overlapThreshold)
172 |         for cmprEntry in entryList
173 |     ]
174 | 
175 |     # Find the min and max intensity levels for normalizing later
176 |     leftMin, leftMax = common.getMinMaxAmplitude(leftFN, stepSize, leftEntryList)
177 |     rightMin, rightMax = common.getMinMaxAmplitude(rightFN, stepSize, rightEntryList)
178 | 
179 |     # First add all of the entries with no overlap
180 |     newLeftEntryList = []
181 |     for leftEntry in leftEntryList:
182 |         if all(overlapCheck(leftEntry, rightEntryList)):
183 |             newLeftEntryList.append(leftEntry)
184 | 
185 |     newRightEntryList = []
186 |     for rightEntry in rightEntryList:
187 |         if all(overlapCheck(rightEntry, leftEntryList)):
188 |             newRightEntryList.append(rightEntry)
189 | 
190 |     # For all entries with overlap, split them by speaker
191 |     # Utilizing the left channel as a base, this chunks through all overlapping
192 |     # in a single pass of the left channel, until there are no more overlapping
193 |     # segments between the right and left channels.
194 |     i = 0
195 |     while i < len(leftEntryList):
196 |         # Check if there are any segments in the right channel that overlap
197 |         # with the current segment in the left channel.  If not, move to
198 |         # the next segment.
199 |         leftEntry = leftEntryList[i]
200 |         overlapCheckList = overlapCheck(leftEntry, rightEntryList)
201 |         if all(overlapCheckList):
202 |             i += 1
203 |             continue
204 | 
205 |         # Otherwise, resolve the first segment in the right channel that
206 |         # overlaps with the current segment
207 |         leftEntry = leftEntryList.pop(i)
208 | 
209 |         j = overlapCheckList.index(False)  # Find the first overlap
210 |         rightEntry = rightEntryList.pop(j)
211 | 
212 |         entryTuple = assignAudioEventsForEntries(
213 |             leftFN,
214 |             rightFN,
215 |             leftEntry,
216 |             rightEntry,
217 |             stepSize,
218 |             speakerNumSteps,
219 |             leftMin,
220 |             leftMax,
221 |             rightMin,
222 |             rightMax,
223 |         )
224 |         tmpLeftEntryList, tmpRightEntryList = entryTuple
225 | 
226 |         leftEntryList[i:i] = tmpLeftEntryList
227 |         rightEntryList[j:j] = tmpRightEntryList
228 | 
229 |     # Combine the original non-overlapping segments with the adjusted segments
230 |     newLeftEntryList.extend(leftEntryList)
231 |     newRightEntryList.extend(rightEntryList)
232 | 
233 |     newLeftEntryList.sort()
234 |     newRightEntryList.sort()
235 | 
236 |     newLeftEntryList = [
237 |         entry
238 |         for entry in newLeftEntryList
239 |         if (entry[1] - entry[0] > stepSize * speakerNumSteps)
240 |     ]
241 |     newRightEntryList = [
242 |         entry
243 |         for entry in newRightEntryList
244 |         if (entry[1] - entry[0] > stepSize * speakerNumSteps)
245 |     ]
246 | 
247 |     return newLeftEntryList, newRightEntryList
248 | 


--------------------------------------------------------------------------------
/pyacoustics/speech_detection/split_on_tone.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Sep 6, 2014
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import os
 8 | from os.path import join
 9 | import math
10 | 
11 | from pyacoustics.signals import audio_scripts
12 | from pyacoustics.utilities import sequences
13 | 
14 | BEEP = "beep"
15 | SILENCE = "silence"
16 | SPEECH = "speech"
17 | 
18 | 
19 | def _homogenizeList(dataList, toneFrequency):
20 |     """
21 |     Discritizes pitch values into one of three categories
22 |     """
23 | 
24 |     minVal = min(dataList)
25 | 
26 |     retDataList = []
27 |     for val in dataList:
28 |         if val == toneFrequency:
29 |             val = BEEP
30 |         elif val == minVal:
31 |             val = SILENCE
32 |         else:
33 |             val = SPEECH
34 |         retDataList.append(val)
35 | 
36 |     return retDataList
37 | 
38 | 
39 | def splitFileOnTone(pitchList, timeStep, toneFrequency, eventDurationThreshold):
40 |     """
41 |     Splits files by pure tones
42 |     """
43 |     toneFrequency = int(round(toneFrequency, -1))
44 | 
45 |     roundedPitchList = [int(round(val, -1)) for val in pitchList]
46 |     codedPitchList = _homogenizeList(roundedPitchList, toneFrequency)
47 | 
48 |     compressedList = sequences.compressList(codedPitchList)
49 |     timeDict = sequences.compressedListTransform(
50 |         compressedList, 1.0 / timeStep, eventDurationThreshold
51 |     )
52 | 
53 |     # Fill in with empty lists if it didn't appear in the dataset
54 |     # (eg no beeps were detected or no speech occurred)
55 |     for key in [BEEP, SPEECH, SILENCE]:
56 |         if key not in timeDict:
57 |             timeDict[key] = []
58 | 
59 |     return timeDict
60 | 
61 | 
62 | def extractSubwavs(timeDict, path, fn, outputPath):
63 |     """
64 |     Extracts segments between tones marked in the output of splitFileOnTone()
65 |     """
66 |     name = os.path.splitext(fn)[0]
67 | 
68 |     duration = audio_scripts.getSoundFileDuration(join(path, fn))
69 |     beepEntryList = timeDict[BEEP]
70 |     segmentEntryList = sequences.invertIntervalList(beepEntryList, 0, duration)
71 | 
72 |     if len(segmentEntryList) > 0:
73 |         numZeroes = int(math.floor(math.log10(len(segmentEntryList)))) + 1
74 |     else:
75 |         numZeroes = 1
76 | 
77 |     strFmt = "%%s_%%0%dd.wav" % numZeroes  # e.g. '%s_%02d.wav'
78 | 
79 |     for i, entry in enumerate(segmentEntryList):
80 |         start, stop = entry[:2]
81 | 
82 |         audio_scripts.extractSubwav(
83 |             join(path, fn),
84 |             join(outputPath, strFmt % (name, i)),
85 |             startT=float(start),
86 |             endT=float(stop),
87 |             singleChannelFlag=True,
88 |         )
89 | 


--------------------------------------------------------------------------------
/pyacoustics/speech_detection/textgrids.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Nov 5, 2014
 3 | 
 4 | @author: tmahrt
 5 | 
 6 | Textgrid utilities for saving the output of speech detection code into
 7 | praat textgrids.
 8 | """
 9 | 
10 | from praatio import textgrid
11 | 
12 | 
13 | def outputTextgrid(outputFN, duration, entryList, tierName):
14 |     # Give all entries a label indicating their order of occurrence
15 |     entryList.sort()
16 |     newEntryList = [(entry[0], entry[1], str(i)) for i, entry in enumerate(entryList)]
17 | 
18 |     # Output textgrid
19 |     tierSpeech = textgrid.IntervalTier(tierName, newEntryList, 0, duration)
20 | 
21 |     tg = textgrid.Textgrid()
22 |     tg.addTier(tierSpeech)
23 |     tg.save(outputFN, format="short_textgrid", includeBlankSpaces=True)
24 | 
25 | 
26 | def outputStereoTextgrid(
27 |     outputFN, duration, leftEntryList, rightEntryList, leftChannelName, rightChannelName
28 | ):
29 |     # Give all entries a label indicating their order of occurrence
30 |     leftEntryList.sort()
31 |     newLeftEntryList = [
32 |         (entry[0], entry[1], str(i)) for i, entry in enumerate(leftEntryList)
33 |     ]
34 | 
35 |     rightEntryList.sort()
36 |     newRightEntryList = [
37 |         (entry[0], entry[1], str(i)) for i, entry in enumerate(rightEntryList)
38 |     ]
39 | 
40 |     # This shouldn't be necessary
41 |     newLeftEntryList = [
42 |         entry
43 |         for entry in newLeftEntryList
44 |         if entry[1] <= duration and entry[0] < entry[1]
45 |     ]
46 |     newRightEntryList = [
47 |         entry
48 |         for entry in newRightEntryList
49 |         if entry[1] <= duration and entry[0] < entry[1]
50 |     ]
51 | 
52 |     # Output textgrid
53 |     leftTier = textgrid.IntervalTier(leftChannelName, newLeftEntryList, 0, duration)
54 |     rightTier = textgrid.IntervalTier(rightChannelName, newRightEntryList, 0, duration)
55 | 
56 |     outputTG = textgrid.Textgrid()
57 |     outputTG.addTier(leftTier)
58 |     outputTG.addTier(rightTier)
59 | 
60 |     outputTG.save(outputFN, format="short_textgrid", includeBlankSpaces=True)
61 | 


--------------------------------------------------------------------------------
/pyacoustics/speech_filters/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Oct 27, 2014
3 | 
4 | @author: tmahrt
5 | """
6 | 


--------------------------------------------------------------------------------
/pyacoustics/speech_filters/speech_shaped_noise.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Mar 18, 2016
  3 | 
  4 | @author: timmahrt
  5 | 
  6 | *Preface: I'm not an expert in noise.  What I've written here is just how I
  7 | (naively) understand this topic.
  8 | 
  9 | The following code is used for generating speech-shaped noise and masking
 10 | speech using it.  Speech-shaped noise is white noise with the same spectral
 11 | properties as speech.  As individual people have different spectral qualities,
 12 | speech shaped noise should ideally be generated for each individual.
 13 | 
 14 | The process:
 15 | - first, speech shaped noise is generated for a speaker's
 16 |   recordings via generateNoise()
 17 | - second, the speaker's data is then masked using the generated noise
 18 |   via maskSpeech()
 19 | 
 20 | The alternative process:
 21 | - if the files used to generate the noise are the same files that need to
 22 |   be masked, use the convenience function batchMaskSpeakerData()
 23 | 
 24 | Some guidelines:
 25 | - at least 3 minutes of speech should be used.  If less than that is used,
 26 |   the noise may contain harmonic components
 27 | - silences can exist in the input
 28 | 
 29 | Requires scipy and numpy
 30 | 
 31 | See the bottom of this file for an example usage.
 32 | """
 33 | 
 34 | import os
 35 | from os.path import join
 36 | 
 37 | import functools
 38 | import wave
 39 | 
 40 | import numpy as np
 41 | from scipy.io import wavfile
 42 | from scipy import signal
 43 | from numpy import fft
 44 | 
 45 | ###########################
 46 | # start of pambox code
 47 | # Copyright (c) 2014, Alexandre Chabot-Leclerc
 48 | # See LICENSE file for more information
 49 | ###########################
 50 | 
 51 | 
 52 | def _dbspl(x, ac=False, offset=0.0):
 53 |     """Computes RMS value of signal in dB.
 54 | 
 55 |     By default, a signal with an RMS value of 1 will have a level of 0 dB
 56 |     SPL.
 57 | 
 58 |     Parameters
 59 |     ----------
 60 |     x : array_like
 61 |         Signal for which to caculate the sound-pressure level.
 62 |     ac : bool
 63 |         Consider only the AC component of the signal, i.e. the mean is
 64 |         removed (Default value =  False)
 65 |     offset : float
 66 |         Reference to convert between RMS and dB SPL.  (Default value = 0.0)
 67 |     axis : int
 68 |         Axis on which to compute the SPL value (Default value = -1, last axis)
 69 | 
 70 |     Returns
 71 |     -------
 72 |     ndarray
 73 |         Sound-pressure levels.
 74 | 
 75 |     References
 76 |     ----------
 77 |     .. [1] Auditory Modeling Toolbox, Peter L. Soendergaard
 78 |       B. C. J. Moore. An Introduction to the Psychology of Hearing. Academic
 79 |       Press, 5th edition, 2003.
 80 | 
 81 |     See also
 82 |     --------
 83 |     setdbspl
 84 |     rms
 85 |     """
 86 |     x = np.asarray(x)
 87 |     return 20.0 * np.log10(_rms(x, ac)) + float(offset)
 88 | 
 89 | 
 90 | def _read_wav_as_float(path):
 91 |     """Reads a wavefile as a float.
 92 |     Parameters
 93 |     ----------
 94 |     path : string
 95 |         Path to the wave file.
 96 |     Returns
 97 |     -------
 98 |     wav : ndarray
 99 |     """
100 |     _, signal = wavfile.read(path)
101 |     if np.issubdtype(signal.dtype, np.integer):
102 |         # Integer division here.  The '1.0' converts the numbers to float.
103 |         return signal.T / (1.0 * np.abs(np.iinfo(signal.dtype).min))
104 |     return signal.T
105 | 
106 | 
107 | def _write_wav(fname, fs, x, normalize=False):
108 |     """Writes floating point numpy array to 16 bit wavfile.
109 | 
110 |     Convenience wrapper around the scipy.io.wavfile.write function.
111 | 
112 |     The '.wav' extension is added to the file if it is not part of the
113 |     filename string.
114 | 
115 |     Inputs of type `np.float` are converted to `int16` before writing to file.
116 | 
117 |     Parameters
118 |     ----------
119 |     fname : string
120 |         Filename with path.
121 |     fs : int
122 |         Sampling frequency.
123 |     x : array_like
124 |         Signal with the shape N_channels x Length
125 |     normalize : bool
126 |         Scale the signal such that its maximum value is one.
127 | 
128 |     Returns
129 |     -------
130 |     None
131 | 
132 |     """
133 |     # Make sure that the channels are the second dimension
134 |     fs = np.int(fs)
135 |     if not fname.endswith(".wav"):
136 |         fname += ".wav"
137 | 
138 |     if x.shape[0] <= 2:
139 |         x = x.T
140 | 
141 |     if np.issubdtype(x.dtype, np.float) and normalize:
142 |         scaled = x / np.max(np.abs(x)) * (2**15 - 1)
143 |     elif np.issubdtype(x.dtype, np.float):
144 |         scaled = x * (2**15 - 1)
145 |     else:
146 |         scaled = x
147 |     wavfile.write(fname, fs, scaled.astype("int16"))
148 | 
149 | 
150 | def _rms(x, ac=False, axis=-1):
151 |     """Calculates the RMS value of a signal.
152 | 
153 |     Parameters
154 |     ----------
155 |     x : array_like
156 |         Signal.
157 |     ac : bool
158 |         Consider only the AC component of the signal. (Default value = False)
159 |     axis :
160 |         Axis on which to calculate the RMS value. The default is to calculate
161 |         the RMS on the last dimensions, i.e. axis = -1.
162 | 
163 |     Returns
164 |     -------
165 |     ndarray
166 |         RMS value of the signal.
167 | 
168 |     """
169 |     x = np.asarray(x)
170 |     if ac:
171 |         if x.ndim > 1 and axis == -1:
172 |             x_mean = x.mean(axis=axis)[..., np.newaxis]
173 |         else:
174 |             x_mean = x.mean(axis=axis)
175 |         return np.linalg.norm((x - x_mean) / np.sqrt(x.shape[axis]), axis=axis)
176 |     else:
177 |         return np.linalg.norm(x / np.sqrt(x.shape[axis]), axis=axis)
178 | 
179 | 
180 | def _mix_noise(clean, noise, sent_level, snr=None):
181 |     """Mix a signal signal noise at a given signal-to-noise ratio.
182 | 
183 |     Parameters
184 |     ----------
185 |     clean : ndarray
186 |         Clean signal.
187 |     noise : ndarray
188 |         Noise signal.
189 |     sent_level : float
190 |         Sentence level, in dB SPL.
191 |     snr :
192 |         Signal-to-noise ratio at which to mix the signals, in dB. If snr is
193 |         `None`,  no noise is mixed with the signal (Default value = None)
194 | 
195 |     Returns
196 |     -------
197 |     tuple of ndarrays
198 |         Returns the clean signal, the mixture, and the noise.
199 | 
200 |     """
201 | 
202 |     # Pick a random section of the noise
203 |     n_clean = len(clean)
204 |     n_noise = len(noise)
205 |     if n_noise > n_clean:
206 |         start_idx = np.random.randint(n_noise - n_clean)
207 |         noise = noise[start_idx : start_idx + n_clean]
208 | 
209 |     if snr is not None:
210 |         # Get speech level and set noise level accordingly
211 |         # clean_level = utils.dbspl(clean)
212 |         # noise = utils.setdbspl(noise, clean_level - snr)
213 |         noise = noise / _rms(noise) * 10 ** ((sent_level - snr) / 20)
214 |         mix = clean + noise
215 |     else:
216 |         mix = clean
217 | 
218 |     return clean, mix, noise
219 | 
220 | 
221 | def _noise_from_signal(x, fs=40000, keep_env=False):
222 |     """Create a noise with same spectrum as the input signal.
223 | 
224 |     Parameters
225 |     ----------
226 |     x : array_like
227 |         Input signal.
228 |     fs : int
229 |          Sampling frequency of the input signal. (Default value = 40000)
230 |     keep_env : bool
231 |          Apply the envelope of the original signal to the noise. (Default
232 |          value = False)
233 | 
234 |     Returns
235 |     -------
236 |     ndarray
237 |         Noise signal.
238 | 
239 |     """
240 |     x = np.asarray(x)
241 |     n_x = x.shape[-1]
242 |     n_fft = next_pow_2(n_x)
243 |     X = fft.rfft(x, next_pow_2(n_fft))
244 |     # Randomize phase.
245 |     noise_mag = np.abs(X) * np.exp(2 * np.pi * 1j * np.random.random(X.shape[-1]))
246 |     noise = np.real(fft.irfft(noise_mag, n_fft))
247 |     out = noise[:n_x]
248 | 
249 |     if keep_env:
250 |         env = np.abs(signal.hilbert(x))
251 |         [bb, aa] = signal.butter(6, 50 / (fs / 2))  # 50 Hz LP filter
252 |         env = signal.filtfilt(bb, aa, env)
253 |         out *= env
254 | 
255 |     return out
256 | 
257 | 
258 | def next_pow_2(x):
259 |     """Calculates the next power of 2 of a number."""
260 |     return int(pow(2, np.ceil(np.log2(x))))
261 | 
262 | 
263 | ###########################
264 | # end of pambox code
265 | ###########################
266 | 
267 | 
268 | class NotListException(Exception):
269 |     def __str__(self):
270 |         return "Error.  First argument must be a list of file names."
271 | 
272 | 
273 | class InconsistentFramerateException(Exception):
274 |     def __init__(self, wavFNList, framerateList):
275 |         super(InconsistentFramerateException, self).__init__()
276 | 
277 |         self.framerateDict = {}
278 | 
279 |         framerateSet = list(set(framerateList))
280 |         for framerate in framerateSet:
281 |             self.framerateDict[framerate] = []
282 | 
283 |         for wavFN, framerate in zip(wavFNList, framerateList):
284 |             self.framerateDict[framerate].append(wavFN)
285 | 
286 |     def __str__(self):
287 |         outputStr = "Error.  All wave files must have the same framerate"
288 | 
289 |         for framerate, fnList in self.framerateDict.items():
290 |             outputStr += "\n%s: %s" % (framerate, repr(fnList))
291 | 
292 |         return outputStr
293 | 
294 | 
295 | def _getFramerate(wavFN):
296 |     audiofile = wave.open(wavFN, "r")
297 |     params = audiofile.getparams()
298 | 
299 |     return params[2]
300 | 
301 | 
302 | def _getDuration(waveFN):
303 |     """
304 |     Returns the duration of a wav file (in seconds)
305 |     """
306 |     audiofile = wave.open(waveFN, "r")
307 | 
308 |     params = audiofile.getparams()
309 |     framerate = params[2]
310 |     nframes = params[3]
311 | 
312 |     duration = float(nframes) / framerate
313 |     return duration
314 | 
315 | 
316 | def _getMatchFunc(pattern):
317 |     """
318 |     An unsophisticated pattern matching function
319 |     """
320 | 
321 |     # '#' Marks word boundaries, so if there is more than one we need to do
322 |     #    something special to make sure we're not mis-representings them
323 |     assert pattern.count("#") < 2
324 | 
325 |     def startsWith(subStr, fullStr):
326 |         return fullStr[: len(subStr)] == subStr
327 | 
328 |     def endsWith(subStr, fullStr):
329 |         return fullStr[-1 * len(subStr) :] == subStr
330 | 
331 |     def inStr(subStr, fullStr):
332 |         return subStr in fullStr
333 | 
334 |     # Selection of the correct function
335 |     if pattern[0] == "#":
336 |         pattern = pattern[1:]
337 |         cmpFunc = startsWith
338 | 
339 |     elif pattern[-1] == "#":
340 |         pattern = pattern[:-1]
341 |         cmpFunc = endsWith
342 | 
343 |     else:
344 |         cmpFunc = inStr
345 | 
346 |     return functools.partial(cmpFunc, pattern)
347 | 
348 | 
349 | def findFiles(
350 |     path,
351 |     filterPaths=False,
352 |     filterExt=None,
353 |     filterPattern=None,
354 |     skipIfNameInList=None,
355 |     stripExt=False,
356 |     addPath=False,
357 | ):
358 |     """
359 |     The primary use is to find files in a folder spoken by the same speaker
360 | 
361 |     Feed the input of findFiles into generateSpeechShapedNoise() as the first
362 |     argument.
363 |     """
364 |     fnList = os.listdir(path)
365 | 
366 |     if filterPaths is True:
367 |         fnList = [
368 |             folderName
369 |             for folderName in fnList
370 |             if os.path.isdir(os.path.join(path, folderName))
371 |         ]
372 | 
373 |     if filterExt is not None:
374 |         splitFNList = [
375 |             [
376 |                 fn,
377 |             ]
378 |             + list(os.path.splitext(fn))
379 |             for fn in fnList
380 |         ]
381 |         fnList = [fn for fn, name, ext in splitFNList if ext == filterExt]
382 | 
383 |     if filterPattern is not None:
384 |         splitFNList = [
385 |             [
386 |                 fn,
387 |             ]
388 |             + list(os.path.splitext(fn))
389 |             for fn in fnList
390 |         ]
391 |         matchFunc = _getMatchFunc(filterPattern)
392 |         fnList = [fn for fn, name, ext in splitFNList if matchFunc(name)]
393 | 
394 |     if skipIfNameInList is not None:
395 |         targetNameList = [os.path.splitext(fn)[0] for fn in skipIfNameInList]
396 |         fnList = [fn for fn in fnList if os.path.splitext(fn)[0] not in targetNameList]
397 | 
398 |     if stripExt is True:
399 |         fnList = [os.path.splitext(fn)[0] for fn in fnList]
400 | 
401 |     if addPath is True:
402 |         fnList = [join(path, fn) for fn in fnList]
403 | 
404 |     fnList.sort()
405 |     return fnList
406 | 
407 | 
408 | def generateNoise(inputFNList, outputFN, outputDuration=None):
409 |     """
410 |     Generates a file of random noise within the spectrum provided by the input
411 | 
412 |     The input should contain at least 3 minutes of speech for best results.
413 |     Silences can exist in with the speech.  Multiple files can be considered
414 |     for one speech shaped noise generation.
415 | 
416 |     With less than 3 minutes, the speech shaped noise might contain
417 |     harmonic components.
418 | 
419 |     The output will have the same duration as the input, but if you don't need
420 |     such a long file, you can truncate the output.
421 |     """
422 | 
423 |     # Input must be a list
424 |     if not isinstance(inputFNList, list):
425 |         raise NotListException()
426 | 
427 |     # Verify that all files have the same framerate
428 |     framerateList = [_getFramerate(fn) for fn in inputFNList]
429 |     framerate = framerateList[0]
430 |     if not all([tmpFramerate == framerate for tmpFramerate in framerateList]):
431 |         raise InconsistentFramerateException(inputFNList, framerateList)
432 | 
433 |     outputPath = os.path.split(outputFN)[0]
434 |     if not os.path.exists(outputPath):
435 |         os.mkdir(outputPath)
436 | 
437 |     # Append the frames across all audio files
438 |     audioFrames = []
439 |     for fn in inputFNList:
440 |         audioFrames.extend(_read_wav_as_float(fn))
441 | 
442 |     # Get the speech shaped noise
443 |     # I'm not sure what the third argument does, but setting it
444 |     # to True makes the output sound horrible in my experience.
445 |     noiseFrames = _noise_from_signal(audioFrames, framerate, False)
446 | 
447 |     # Crop the file if specified by parameter /outputDuration/
448 |     if outputDuration is not None:
449 |         duration = len(noiseFrames) / framerate
450 |         if duration < outputDuration:
451 |             errMsg = (
452 |                 "Duration shorter than requested for file '%s'. " "Not cropping output."
453 |             )
454 |             print(errMsg % outputDuration)
455 |         else:
456 |             noiseFrames = noiseFrames[: outputDuration * framerate]
457 | 
458 |     _write_wav(outputFN, framerate, noiseFrames, True)
459 | 
460 | 
461 | def maskSpeech(inputFN, noiseFN, outputFN, snr):
462 |     """
463 |     Mask the input file with the noise file with level snr (dB).
464 | 
465 |     noise file can be generated with generateSpeechShapedNoise()
466 | 
467 |     Interesting snr values, that increasingly distort the speech,
468 |     are 3 to -11.  See Aubanel et al 2014 for more information.
469 |     """
470 | 
471 |     outputPath = os.path.split(outputFN)[0]
472 |     if not os.path.exists(outputPath):
473 |         os.mkdir(outputPath)
474 | 
475 |     audioFrames = _read_wav_as_float(inputFN)
476 |     noiseFrames = _read_wav_as_float(noiseFN)
477 |     clean_level = _dbspl(audioFrames)
478 |     framerate = _getFramerate(inputFN)
479 |     noiseFramerate = _getFramerate(inputFN)
480 | 
481 |     if framerate != noiseFramerate:
482 |         InconsistentFramerateException([inputFN, noiseFN], [framerate, noiseFramerate])
483 | 
484 |     outputFrames = _mix_noise(audioFrames[:], noiseFrames[:], clean_level, snr)[1]
485 | 
486 |     print(outputFN)
487 |     _write_wav(outputFN, framerate, outputFrames, True)
488 | 
489 | 
490 | def batchMaskSpeakerData(
491 |     fnList, noiseProfileFN, outputPath, snrList, regenerateNoiseProfile=True
492 | ):
493 |     """
494 |     Given a set of speech from a single speaker, mask each file with noise
495 | 
496 |     Create the speech shaped noise by combining all the speech files.
497 | 
498 |     This is a convenience function that combines the functionality of
499 |     generateNoise() and maskSpeech()
500 |     """
501 | 
502 |     if not os.path.exists(outputPath):
503 |         os.mkdir(outputPath)
504 | 
505 |     # Generate the noise profile
506 |     if regenerateNoiseProfile is True or not os.path.exists(noiseProfileFN):
507 |         generateNoise(fnList, noiseProfileFN)
508 | 
509 |     # Mask the speech files
510 |     for snr in snrList:
511 |         snrOutputPath = join(outputPath, repr(snr))
512 |         if not os.path.exists(snrOutputPath):
513 |             os.mkdir(snrOutputPath)
514 | 
515 |         for fnFullPath in fnList:
516 |             fn = os.path.split(fnFullPath)[1]
517 |             maskSpeech(fnFullPath, noiseProfileFN, join(snrOutputPath, fn), snr)
518 | 
519 | 
520 | if __name__ == "__main__":
521 |     # Example usage
522 |     _inputPath = r"C:\Users\Tim\Desktop\cleaned_wavs"
523 | 
524 |     _noiseFN = r"C:\Users\Tim\Desktop\noise_profiles\amelia_ssn.wav"
525 |     _outputPath = r"C:\Users\Tim\Desktop\noise_filtered_speech"
526 | 
527 |     # You can easily filter each audio file with different snrs by using this
528 |     # list.  Each will be output to an appropriately labeled subfolder of
529 |     # the output path
530 |     _snrList = [
531 |         -3,
532 |     ]
533 | 
534 |     # You can manually create a list or use this search function to find
535 |     # all of the files produced by the same speaker which you want to
536 |     # create a speech shaped noise for and which you subsequently want
537 |     # to mask using that noise.
538 |     _fnList = findFiles(_inputPath, filterExt=".wav", addPath=True)
539 |     batchMaskSpeakerData(_fnList, _noiseFN, _outputPath, _snrList)
540 | 


--------------------------------------------------------------------------------
/pyacoustics/speech_rate/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/speech_rate/__init__.py


--------------------------------------------------------------------------------
/pyacoustics/speech_rate/dictionary_estimate.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Jan 28, 2015
  3 | 
  4 | @author: tmahrt
  5 | """
  6 | 
  7 | import os
  8 | from os.path import join
  9 | 
 10 | from pyacoustics.utilities import utils
 11 | from pysle import isletool
 12 | 
 13 | 
 14 | def percentInside(startTime, endTime, cmprStartTime, cmprEndTime):
 15 |     if float(startTime) <= float(cmprEndTime) and float(endTime) >= float(
 16 |         cmprStartTime
 17 |     ):
 18 |         leftEdge = cmprStartTime - startTime
 19 |         rightEdge = endTime - cmprEndTime
 20 | 
 21 |         if leftEdge < 0:
 22 |             leftEdge = 0
 23 |         if rightEdge < 0:
 24 |             rightEdge = 0
 25 | 
 26 |         retVal = 1 - ((rightEdge + leftEdge)) / (endTime - startTime)
 27 | 
 28 |     # No overlap
 29 |     else:
 30 |         retVal = 0
 31 | 
 32 |     return retVal
 33 | 
 34 | 
 35 | def manualPhoneCount(tgInfoPath, isleFN, outputPath, skipList=None):
 36 |     if skipList is None:
 37 |         skipList = []
 38 | 
 39 |     utils.makeDir(outputPath)
 40 | 
 41 |     isleDict = isletool.LexicalTool(isleFN)
 42 | 
 43 |     existFNList = utils.findFiles(outputPath, filterPaths=".txt")
 44 |     for fn in utils.findFiles(
 45 |         tgInfoPath, filterExt=".txt", skipIfNameInList=existFNList
 46 |     ):
 47 |         if os.path.exists(join(outputPath, fn)):
 48 |             continue
 49 |         print(fn)
 50 | 
 51 |         dataList = utils.openCSV(tgInfoPath, fn)
 52 |         dataList = [row[2] for row in dataList]  # start, stop, tmpLabel
 53 |         outputList = []
 54 |         for tmpLabel in dataList:
 55 |             if tmpLabel not in skipList:
 56 |                 syllableCount, phoneCount = isletool.getNumPhones(
 57 |                     isleDict, tmpLabel, maxFlag=True
 58 |                 )
 59 |             else:
 60 |                 syllableCount, phoneCount = 0, 0
 61 | 
 62 |             outputList.append("%d,%d" % (syllableCount, phoneCount))
 63 | 
 64 |         outputTxt = "\n".join(outputList)
 65 | 
 66 |         with open(join(outputPath, fn), "w") as fd:
 67 |             fd.write(outputTxt)
 68 | 
 69 | 
 70 | def manualPhoneCountForEpochs(manualCountsPath, tgInfoPath, epochPath, outputPath):
 71 |     utils.makeDir(outputPath)
 72 | 
 73 |     skipList = utils.findFiles(outputPath, filterExt=".txt")
 74 |     for fn in utils.findFiles(tgInfoPath, filterExt=".txt", skipIfNameInList=skipList):
 75 |         epochList = utils.openCSV(epochPath, fn)
 76 |         tgInfo = utils.openCSV(tgInfoPath, fn)
 77 |         manualCounts = utils.openCSV(manualCountsPath, fn)
 78 | 
 79 |         epochOutputList = []
 80 |         for epochTuple in epochList:  # Epoch num, start, stop
 81 |             epochStart, epochStop = float(epochTuple[1]), float(epochTuple[2])
 82 | 
 83 |             # Find all of the intervals that are at least partially
 84 |             # contained within the current epoch
 85 |             epochSyllableCount = 0
 86 |             epochPhoneCount = 0
 87 |             speechDuration = 0
 88 |             for info, counts in utils.safeZip(
 89 |                 [tgInfo, manualCounts], enforceLength=True
 90 |             ):
 91 |                 start, stop = float(info[0]), float(info[1])
 92 |                 syllableCount, phoneCount = float(counts[0]), float(counts[1])
 93 | 
 94 |                 # Accounts for intervals that straddle an epoch boundary
 95 |                 multiplicationFactor = percentInside(start, stop, epochStart, epochStop)
 96 | 
 97 |                 speechDuration += (stop - start) * multiplicationFactor
 98 | 
 99 |                 epochSyllableCount += syllableCount * multiplicationFactor
100 |                 epochPhoneCount += phoneCount * multiplicationFactor
101 | 
102 |             epochOutputList.append(
103 |                 "%f,%f,%f" % (epochSyllableCount, epochPhoneCount, speechDuration)
104 |             )
105 | 
106 |         with open(join(outputPath, fn), "w") as fd:
107 |             fd.write("\n".join(epochOutputList))
108 | 


--------------------------------------------------------------------------------
/pyacoustics/speech_rate/uwe_sr.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on July 28, 2015
 3 | 
 4 | @author: tmahrt
 5 | 
 6 | This code estimates the speech rate of a speaker by using Uwe Reichel's matlab
 7 | script for detecting syllable nuclei over some interval.
 8 | """
 9 | 
10 | from os.path import join
11 | 
12 | from pyacoustics.utilities import utils
13 | from pyacoustics.utilities import matlab
14 | 
15 | 
16 | def findSyllableNuclei(
17 |     inputPath, outputPath, matlabEXE, matlabScriptsPath, printCmd=False
18 | ):
19 |     """
20 |     Makes a file listing the syllable nuclei for each file in inputPath
21 |     """
22 |     utils.makeDir(outputPath)
23 | 
24 |     pathList = [matlabScriptsPath, join(matlabScriptsPath, "nucleus_detection_matlab")]
25 |     cmd = "detect_syllable_nuclei('%s', '%s');" % (inputPath, outputPath)
26 |     matlab.runMatlabFunction(cmd, matlabEXE, pathList, printCmd)
27 | 
28 | 
29 | def toAbsoluteTime(namePrefix, matlabOutputPath, startTimeList):
30 |     """
31 |     Converts the sampled times from relative to absolute time
32 | 
33 |     The input may be split across a number of files.  This script assumes
34 |     that files of the pattern <<namePrefix>><<nameSuffix>>.txt correspond
35 |     to different parts of the same source file.
36 | 
37 |     namePrefix - name of the original wav file with no suffix
38 |     speechRatePath - the path where the output of the matlab script is placed
39 |     startTimeList - there needs to be one file here for each file in
40 |                     speechRatePath with the pattern namePrefix
41 | 
42 |     Returns a list of lists where each sublist corresponds to the output of
43 |     one file matching <<namePrefix>>
44 |     """
45 |     # Load subset speech rate
46 |     speechRateFNList = utils.findFiles(
47 |         matlabOutputPath, filterExt=".txt", filterPattern=namePrefix
48 |     )
49 | 
50 |     returnList = []
51 |     for start, speechRateFN in utils.safeZip(
52 |         [startTimeList, speechRateFNList], enforceLength=True
53 |     ):
54 |         speechRateList = utils.openCSV(matlabOutputPath, speechRateFN, valueIndex=0)
55 |         speechRateList = [value for value in speechRateList if value != ""]
56 |         speechRateList = [
57 |             str(float(start) + float(sampNum)) for sampNum in speechRateList
58 |         ]
59 | 
60 |         returnList.append(speechRateList)
61 | 
62 |     return returnList
63 | 
64 | 
65 | def uweSyllableCountForInterval(startTime, stopTime, nucleiCenterList):
66 |     countList = [
67 |         timestamp
68 |         for timestamp in nucleiCenterList
69 |         if timestamp >= startTime and timestamp <= stopTime
70 |     ]
71 | 
72 |     return len(countList)
73 | 


--------------------------------------------------------------------------------
/pyacoustics/text/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/text/__init__.py


--------------------------------------------------------------------------------
/pyacoustics/text/frequency.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from os.path import join
  4 | from itertools import islice
  5 | 
  6 | try:
  7 |     from itertools import izip as zip
  8 | except:
  9 |     pass
 10 | import math
 11 | import io
 12 | 
 13 | from pyacoustics.utilities import utils
 14 | 
 15 | 
 16 | class CountCorpus(object):
 17 |     def __init__(self, frequencyDict, totalCount=None):
 18 |         """
 19 |         A generic class for handling corpora.
 20 | 
 21 |         For large corpora you can save the totalCount somewhere and pass it
 22 |         in during instantiation.  Otherwise, it will be calculated at
 23 |         runtime.
 24 |         """
 25 |         self.frequencyDict = frequencyDict
 26 | 
 27 |         if totalCount is None:
 28 |             totalCount = self._getNumWords()
 29 |         self.totalCount = totalCount
 30 | 
 31 |     def getFrequency(self, word, normFunc=None, outOfDictionaryValue=None):
 32 |         try:
 33 |             count = self.frequencyDict[word]
 34 |         except KeyError:
 35 |             if outOfDictionaryValue is None:
 36 |                 raise
 37 |             else:
 38 |                 print("OOD Word: %s" % word)
 39 |                 count = outOfDictionaryValue
 40 | 
 41 |         try:
 42 |             if normFunc is None:
 43 |                 freq = float(count) / self.totalCount
 44 |             else:
 45 |                 freq = normFunc(count, self.totalCount)
 46 | 
 47 |             logFreq = math.log(float(count))
 48 |         except ValueError:
 49 |             freq = ""
 50 |             logFreq = ""
 51 | 
 52 |         return count, freq, logFreq
 53 | 
 54 |     def _getNumWords(self):
 55 |         """
 56 |         Gets the number of words in the corpus
 57 |         """
 58 |         sumV = 0
 59 |         for word in self.frequencyDict.keys():
 60 |             sumV += self.frequencyDict[word]
 61 | 
 62 |         return sumV
 63 | 
 64 | 
 65 | class GoogleUnigram(CountCorpus):
 66 |     NUM_WORDS = 1024908267229.0
 67 | 
 68 |     def __init__(self, googleUnigram):
 69 |         # Load the corpus data
 70 |         frequencyDict = {}
 71 |         with open(googleUnigram, "r") as fd:
 72 |             data = fd.read()
 73 |         dataList = data.split()
 74 |         for word, count in zip(
 75 |             islice(dataList, 0, None, 2), islice(dataList, 1, None, 2)
 76 |         ):
 77 |             frequencyDict[word] = count
 78 | 
 79 |         super(GoogleUnigram, self).__init__(frequencyDict, GoogleUnigram.NUM_WORDS)
 80 | 
 81 | 
 82 | class Switchboard(CountCorpus):
 83 |     NUM_WORDS = 1456224.0
 84 | 
 85 |     def __init__(self, switchboardCounts):
 86 |         # Load the corpus
 87 |         frequencyDict = {}
 88 |         with open(switchboardCounts, "r") as fd:
 89 |             data = fd.read()
 90 | 
 91 |         dataList = data.split("\n")
 92 |         dataList = [
 93 |             row[1:-2].strip() for row in dataList if len(row) > 2 and row[0] != ";"
 94 |         ]
 95 |         dataList = [row.split(" ") for row in dataList]
 96 | 
 97 |         for row in dataList:
 98 |             word = row[0]
 99 |             count = row[-4]
100 |             frequencyDict[word] = int(count)
101 | 
102 |         super(Switchboard, self).__init__(frequencyDict, Switchboard.NUM_WORDS)
103 | 
104 | 
105 | class SwitchboardTim(CountCorpus):
106 |     NUM_WORDS = 1464017.0
107 | 
108 |     def __init__(self, switchboardCounts):
109 |         frequencyDict = loadCountList(switchboardCounts)
110 |         super(SwitchboardTim, self).__init__(frequencyDict, SwitchboardTim.NUM_WORDS)
111 | 
112 | 
113 | class Buckeye(CountCorpus):
114 |     NUM_WORDS = 282575.0  # Not including words that start with '['
115 | 
116 |     def __init__(self, buckeyeCounts):
117 |         frequencyDict = loadCountList(buckeyeCounts)
118 |         super(Buckeye, self).__init__(frequencyDict, Buckeye.NUM_WORDS)
119 | 
120 | 
121 | class Fischer(CountCorpus):
122 |     NUM_WORDS = 21025946.0
123 | 
124 |     def __init__(self, fischerCounts):
125 |         frequencyDict = loadCountList(fischerCounts)
126 |         super(Fischer, self).__init__(frequencyDict, Fischer.NUM_WORDS)
127 | 
128 | 
129 | class Crea(CountCorpus):
130 |     NUM_WORDS = 152554665
131 | 
132 |     def __init__(self, creaCounts):
133 |         frequencyDict = loadCountList(creaCounts)
134 |         super(Crea, self).__init__(frequencyDict, Crea.NUM_WORDS)
135 | 
136 | 
137 | class FrenchCorpus(CountCorpus):
138 |     NUM_WORDS = None
139 | 
140 |     def __init__(self, frenchCounts):
141 |         frequencyDict = loadCountList(frenchCounts)
142 |         super(FrenchCorpus, self).__init__(frequencyDict, 0)
143 | 
144 | 
145 | def calcWordsPerMillion(count, totalCount):
146 |     million = 1000000
147 |     assert totalCount > million
148 |     return count * million / totalCount
149 | 
150 | 
151 | def loadFrenchList(fnFullPath, outputFullPath):
152 |     with io.open(fnFullPath, "r", encoding="utf-8") as fd:
153 |         data = fd.read()
154 |     frequencyDict = {}
155 | 
156 |     dataList = data.splitlines()
157 |     dataList = [row.rsplit(",") for row in dataList[1:]]
158 |     dataList = [(rowList[0], float(rowList[6])) for rowList in dataList]
159 | 
160 |     # Some items appear multiple times but with different meanings
161 |     countList = [dataList.pop(0)]
162 |     for word, count in dataList:
163 |         if word == countList[-1][0]:
164 |             countList[-1] = (word, countList[-1][1] + count)
165 |         else:
166 |             countList.append((word, count))
167 | 
168 |     countList = [",".join((word, str(count))) for word, count in countList]
169 | 
170 |     with io.open(outputFullPath, "w", encoding="utf-8") as fd:
171 |         fd.write("\n".join(countList))
172 | 
173 | 
174 | def loadCountList(fnFullPath):
175 |     """
176 |     Loads counts from file that stores word counts in the form "word, count\n"
177 |     """
178 |     with io.open(fnFullPath, "r", encoding="utf-8") as fd:
179 |         data = fd.read()
180 |     frequencyDict = {}
181 | 
182 |     dataList = data.split("\n")
183 |     dataList = [row.rsplit(",", 1) for row in dataList]
184 | 
185 |     for word, count in dataList:
186 |         frequencyDict[word] = float(count)
187 | 
188 |     return frequencyDict
189 | 
190 | 
191 | def findFrequenciesForWordLists(featurePath, countObj, frequencyNormFunc):
192 |     frequencyPath = join(featurePath, "frequency")
193 |     utils.makeDir(frequencyPath)
194 | 
195 |     wordsPath = join(featurePath, "words")
196 | 
197 |     for fn in utils.findFiles(wordsPath):
198 |         wordList = utils.openCSV(wordsPath, fn, valueIndex=0, encoding="utf-8")
199 |         countList = []
200 |         for word in wordList:
201 |             tmp = countObj.getFrequency(word, frequencyNormFunc, outOfDictionaryValue=1)
202 |             count, freq, logFreq = tmp
203 |             countList.append("%f,%f,%f" % (count, freq, logFreq))
204 | 
205 |         with open(join(frequencyPath, fn), "w") as fd:
206 |             fd.write("\n".join(countList))
207 | 


--------------------------------------------------------------------------------
/pyacoustics/text/transcript.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Oct 20, 2014
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | from os.path import join
 8 | 
 9 | import io
10 | 
11 | from pyacoustics.utilities import utils
12 | 
13 | 
14 | def toWords(featurePath, outputPath):
15 |     utils.makeDir(outputPath)
16 | 
17 |     transcriptPath = join(featurePath, "txt")
18 | 
19 |     for fn in utils.findFiles(transcriptPath, filterExt=".txt"):
20 |         fnFullPath = join(transcriptPath, fn)
21 |         with io.open(fnFullPath, "r", encoding="utf-8") as fd:
22 |             data = fd.read()
23 |         dataList = data.split()
24 | 
25 |         with io.open(join(outputPath, fn), "w", encoding="utf-8") as fd:
26 |             fd.write("\n".join(dataList))
27 | 


--------------------------------------------------------------------------------
/pyacoustics/textgrids/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/textgrids/__init__.py


--------------------------------------------------------------------------------
/pyacoustics/textgrids/syllabify_textgrids.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Oct 22, 2014
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import os
 8 | from os.path import join
 9 | 
10 | from praatio import textgrid
11 | from pysle import isletool
12 | from pysle import praattools
13 | 
14 | 
15 | from pyacoustics.utilities import utils
16 | 
17 | 
18 | def correctTextgridTimes(tgPath, threshold):
19 |     # Are x and y unique but very very similar
20 |     withinThreshold = lambda x, y: (abs(x - y) < threshold) and (x != y)
21 | 
22 |     outputPath = join(tgPath, "correctsTGs")
23 |     utils.makeDir(outputPath)
24 | 
25 |     for fn in utils.findFiles(tgPath, filterExt=".TextGrid"):
26 |         print(fn)
27 |         tg = textgrid.openTextgrid(join(tgPath, fn), includeEmptyIntervals=False)
28 |         wordTier = tg.tierDict["words"]
29 |         phoneTier = tg.tierDict["phones"]
30 | 
31 |         for wordEntry in wordTier.entryList:
32 |             for i, phoneEntry in enumerate(phoneTier.entryList):
33 |                 if textgrid.intervalOverlapCheck(wordEntry, phoneEntry):
34 |                     start = phoneEntry[0]
35 |                     end = phoneEntry[1]
36 |                     phone = phoneEntry[2]
37 | 
38 |                     if withinThreshold(wordEntry[0], start):
39 |                         start = wordEntry[0]
40 |                     elif withinThreshold(wordEntry[1], start):
41 |                         start = wordEntry[1]
42 |                     elif withinThreshold(wordEntry[0], end):
43 |                         end = wordEntry[0]
44 |                     elif withinThreshold(wordEntry[1], end):
45 |                         end = wordEntry[1]
46 | 
47 |                     phoneTier.entryList[i] = (start, end, phone)
48 | 
49 |         tg.save(join(outputPath, fn), format="short_textgrid", includeBlankSpaces=True)
50 | 
51 | 
52 | def syllabifyTextgrids(tgPath, islePath):
53 |     isleDict = isletool.LexicalTool(islePath)
54 | 
55 |     outputPath = join(tgPath, "syllabifiedTGs")
56 |     utils.makeDir(outputPath)
57 |     skipLabelList = ["<VOCNOISE>", "xx", "<SIL>", "{B_TRANS}", "{E_TRANS}"]
58 | 
59 |     for fn in utils.findFiles(tgPath, filterExt=".TextGrid"):
60 |         if os.path.exists(join(outputPath, fn)):
61 |             continue
62 | 
63 |         tg = textgrid.openTextgrid(join(tgPath, fn))
64 | 
65 |         syllableTG = praattools.syllabifyTextgrid(
66 |             isleDict, tg, "words", "phones", skipLabelList=skipLabelList
67 |         )
68 | 
69 |         outputTG = textgrid.Textgrid()
70 |         outputTG.addTier(tg.tierDict["words"])
71 |         outputTG.addTier(tg.tierDict["phones"])
72 |         #         outputTG.addTier(syllableTG.tierDict["syllable"])
73 |         outputTG.addTier(syllableTG.tierDict["tonic"])
74 | 
75 |         outputTG.save(
76 |             join(outputPath, fn), format="short_textgrid", includeBlankSpaces=True
77 |         )
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     tmpISLEPath = "/Users/tmahrt/Dropbox/workspace/pysle/test/islev2.txt"
82 |     #     correctTextgridTimes(tgPath, 0.0025)
83 | 
84 |     tmpTGPath = join(
85 |         "/Users/tmahrt/Desktop/experiments/LMEDS_studies",
86 |         "RPT_English/features/tobi_textgrids/correctsTGs",
87 |     )
88 |     syllabifyTextgrids(tmpTGPath, tmpISLEPath)
89 | 


--------------------------------------------------------------------------------
/pyacoustics/textgrids/textgrids.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Oct 20, 2014
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import os
 8 | from os.path import join
 9 | 
10 | import io
11 | 
12 | from praatio import textgrid
13 | 
14 | from pyacoustics.utilities import utils
15 | 
16 | 
17 | def _navigateTGs(tgPath, name, tierName):
18 |     """
19 |     Converts a textgrid into a plain text format
20 | 
21 |     Each labels is output by the
22 |     """
23 | 
24 |     tg = textgrid.openTextgrid(join(tgPath, name + ".TextGrid"))
25 |     tier = tg.tierDict[tierName]
26 | 
27 |     for start, stop, label in tier.entryList:
28 |         if label.strip() == "":
29 |             continue
30 | 
31 |         yield start, stop, label
32 | 
33 | 
34 | def extractTGInfo(inputPath, outputPath, tierName):
35 |     utils.makeDir(outputPath)
36 | 
37 |     for name in utils.findFiles(inputPath, filterExt=".TextGrid", stripExt=True):
38 |         if os.path.exists(join(outputPath, name + ".txt")):
39 |             continue
40 |         print(name)
41 | 
42 |         outputList = []
43 |         for start, stop, label in _navigateTGs(inputPath, name, tierName):
44 |             outputList.append("%f,%f,%s" % (start, stop, label))
45 | 
46 |         outputTxt = "\n".join(outputList)
47 |         outputFN = join(outputPath, name + ".txt")
48 |         with io.open(outputFN, "w", encoding="utf-8") as fd:
49 |             fd.write(outputTxt)
50 | 
51 | 
52 | def extractTranscript(featurePath, tierName):
53 |     """
54 |     Outputs each label of a textgrid on a separate line in a plain text file
55 |     """
56 | 
57 |     tgPath = join(featurePath, "textgrids")
58 | 
59 |     outputPath = join(featurePath, "transcript")
60 |     utils.makeDir(outputPath)
61 | 
62 |     for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):
63 |         outputList = []
64 |         for entry in _navigateTGs(tgPath, name, tierName):
65 |             label = entry[2]
66 |             outputList.append("%s" % (label))
67 | 
68 |         outputTxt = "\n".join(outputList)
69 |         outputFN = join(outputPath, name + ".txt")
70 |         with io.open(outputFN, "w", encoding="utf-8") as fd:
71 |             fd.write(outputTxt)
72 | 
73 | 
74 | def extractWords(tgPath, tierName, outputPath):
75 |     utils.makeDir(outputPath)
76 | 
77 |     for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):
78 |         outputList = []
79 |         for entry in _navigateTGs(tgPath, name, tierName):
80 |             label = entry[2]
81 |             for word in label.split():
82 |                 outputList.append("%s" % (word))
83 | 
84 |         outputTxt = "\n".join(outputList)
85 |         outputFN = join(outputPath, name + ".txt")
86 |         with io.open(outputFN, "w", encoding="utf-8") as fd:
87 |             fd.write(outputTxt)
88 | 


--------------------------------------------------------------------------------
/pyacoustics/utilities/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/pyacoustics/utilities/__init__.py


--------------------------------------------------------------------------------
/pyacoustics/utilities/error_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Jun 7, 2015
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import os
 8 | 
 9 | 
10 | class ApplicationNotFound(Exception):
11 |     def __init__(self, applicationName):
12 |         super(ApplicationNotFound, self).__init__()
13 |         self.applicationName = applicationName
14 | 
15 |     def __str__(self):
16 |         return "Application (%s) does not exist" % self.applicationName
17 | 
18 | 
19 | def checkForApplication(application):
20 |     if not os.path.exists(application):
21 |         raise ApplicationNotFound(application)
22 | 


--------------------------------------------------------------------------------
/pyacoustics/utilities/filters.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Oct 20, 2014
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import math
 8 | 
 9 | 
10 | def medianFilter(dist, window, useEdgePadding):
11 |     offset = int(math.floor(window / 2.0))
12 |     length = len(dist)
13 | 
14 |     returnList = []
15 |     for x in range(length):
16 |         dataToFilter = []
17 |         # If using edge padding or if 0 <= context <= length
18 |         if useEdgePadding or (((0 <= x - offset) and (x + offset < length))):
19 |             preContext = []
20 |             currentContext = [
21 |                 dist[x],
22 |             ]
23 |             postContext = []
24 | 
25 |             lastKnownLargeIndex = 0
26 |             for y in range(1, offset + 1):  # 1-based
27 |                 if x + y >= length:
28 |                     if lastKnownLargeIndex == 0:
29 |                         largeIndexValue = x
30 |                     else:
31 |                         largeIndexValue = lastKnownLargeIndex
32 |                 else:
33 |                     largeIndexValue = x + y
34 |                     lastKnownLargeIndex = x + y
35 | 
36 |                 postContext.append(dist[largeIndexValue])
37 | 
38 |                 if x - y < 0:
39 |                     smallIndexValue = 0
40 |                 else:
41 |                     smallIndexValue = x - y
42 | 
43 |                 preContext.insert(0, dist[smallIndexValue])
44 | 
45 |             dataToFilter = preContext + currentContext + postContext
46 |             value = _median(dataToFilter)
47 |         else:
48 |             value = dist[x]
49 |         returnList.append(value)
50 | 
51 |     return returnList
52 | 
53 | 
54 | def _median(valList):
55 |     valList = valList[:]
56 |     valList.sort()
57 | 
58 |     if len(valList) % 2 == 0:  # Even
59 |         i = int(len(valList) / 2.0)
60 |         medianVal = (valList[i - 1] + valList[i]) / 2.0
61 |     else:  # Odd
62 |         i = int(len(valList) / 2.0)
63 |         medianVal = valList[i]
64 | 
65 |     return medianVal
66 | 


--------------------------------------------------------------------------------
/pyacoustics/utilities/matlab.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Jul 28, 2015
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import subprocess
 8 | 
 9 | from pyacoustics.utilities import error_utils
10 | 
11 | 
12 | def runMatlabFunction(command, matlabEXE, matlabCodePathList, printCmd=False):
13 |     error_utils.checkForApplication(matlabEXE)
14 | 
15 |     pathCode = "".join(
16 |         ["addpath('%s');" % matlabCodePath for matlabCodePath in matlabCodePathList]
17 |     )
18 |     exitCode = "exit;"
19 | 
20 |     codeSequence = pathCode + command + exitCode
21 | 
22 |     if printCmd is True:
23 |         print(matlabEXE + ' -nosplash -nodesktop -r "%s"' % codeSequence)
24 |     myProcess = subprocess.Popen(
25 |         [matlabEXE, "-nosplash", "-nodesktop", "-r", codeSequence]
26 |     )
27 |     if myProcess.wait():
28 |         exit()  # Something has gone wrong (an error message should be printed)
29 | 


--------------------------------------------------------------------------------
/pyacoustics/utilities/my_math.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Apr 3, 2015
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import math
 8 | 
 9 | 
10 | def rms(intensityValues):
11 |     intensityValues = [val**2 for val in intensityValues]
12 |     meanVal = sum(intensityValues) / len(intensityValues)
13 |     return math.sqrt(meanVal)
14 | 
15 | 
16 | def linspace(start, stop, n):
17 |     if n == 1:
18 |         return [
19 |             stop,
20 |         ]
21 |     h = (stop - start) / float(n - 1)
22 |     return [start + h * i for i in range(n)]
23 | 
24 | 
25 | def orderOfMagnitude(val):
26 |     return int(math.floor(math.log10(val)))
27 | 


--------------------------------------------------------------------------------
/pyacoustics/utilities/normalize.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Oct 16, 2012
  3 | 
  4 | @author: timmahrt
  5 | """
  6 | 
  7 | import math
  8 | 
  9 | 
 10 | def _zscoreNormalize(raw, mean, stdDev):
 11 |     # No problems related to integers or 64-bit floats (which don't trigger
 12 |     # a divide by zero exception)
 13 |     raw, mean, stdDev = float(raw), float(mean), float(stdDev)
 14 |     return (raw - mean) / stdDev
 15 | 
 16 | 
 17 | def zscoreNormalizeValue(value, distribution):
 18 |     """
 19 |     Appropriate to use when the context (the distribution) varies.
 20 |     """
 21 |     mean = sum(distribution) / len(distribution)
 22 | 
 23 |     tmpList = [(tmpVal - mean) ** 2 for tmpVal in distribution]
 24 |     standardDeviation = math.sqrt(sum(tmpList) / len(tmpList))
 25 | 
 26 |     return _zscoreNormalize(value, mean, standardDeviation)
 27 | 
 28 | 
 29 | def syntagmaticNormalization(sampleIndexList, dataList, contextList):
 30 |     """
 31 |     Normalizes using local context (before and after the occurrence)
 32 | 
 33 |     'sampleIndexList' contains the list of indices for values in 'contextList'
 34 |         that should be normalized.
 35 |     'contextList' provides the indices for all words that should be
 36 |         considered (including the present one)
 37 |         e.g. for +/- 2 words [-2, -1, 0, 1, 2]
 38 |     'featureExtractionFunc' provides the function that extracts the
 39 |         relevant feature to be normalized from the words (could be
 40 |         a word or syllable level feature)
 41 |     """
 42 | 
 43 |     dataList = [float(value) for value in dataList]
 44 | 
 45 |     def doSkipValue(value):
 46 |         return value == 0 or value == "None"
 47 | 
 48 |     # Get the files associated with this speaker
 49 |     #    - be patient, running retrieveStressIndex() takes some time the
 50 |     #      first time
 51 |     #    fnList = fetchFNsForSpeaker(speakerID)
 52 | 
 53 |     negativeContextList = [contextI for contextI in contextList if contextI < 0]
 54 |     negativeContextList.sort(reverse=True)
 55 |     positiveContextList = [contextI for contextI in contextList if contextI > 0]
 56 |     positiveContextList.sort()
 57 | 
 58 |     # Create index
 59 |     outputList = []
 60 |     for i in sampleIndexList:
 61 |         value = dataList[i]
 62 | 
 63 |         # A value of 0.0 generally is not meaningful
 64 |         # (TODO: is there anywhere where this is not the case?)
 65 |         if i == -1 or doSkipValue(value):
 66 |             outputList.append(0)
 67 |             continue
 68 | 
 69 |         contextValueList = [
 70 |             dataList[i],
 71 |         ]
 72 | 
 73 |         for incr, tmpContextList in [
 74 |             (-1, negativeContextList[:]),
 75 |             (1, positiveContextList[:]),
 76 |         ]:
 77 |             prevContextValue = dataList[i]
 78 |             for contextI in tmpContextList:
 79 |                 try:
 80 |                     assert i + contextI >= 0
 81 |                     subValue = dataList[i + contextI]
 82 | 
 83 |                 # If we've gone outside the bounds of the file, just
 84 |                 # repeat the last known good value
 85 |                 except (IndexError, AssertionError):
 86 |                     contextValueList.append(prevContextValue)
 87 |                     continue
 88 | 
 89 |                 # Don't count words with meaningless values as part
 90 |                 # of the context
 91 |                 if doSkipValue(subValue):
 92 |                     tmpContextList.append(tmpContextList[-1] + incr)
 93 |                     continue
 94 | 
 95 |                 prevContextValue = subValue
 96 |                 contextValueList.append(subValue)
 97 | 
 98 |         normalizedValue = zscoreNormalizeValue(value, contextValueList)
 99 |         outputList.append(normalizedValue)
100 | 
101 |     return outputList
102 | 


--------------------------------------------------------------------------------
/pyacoustics/utilities/sequences.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Jun 5, 2013
  3 | 
  4 | @author: timmahrt
  5 | """
  6 | import math
  7 | 
  8 | from pyacoustics.utilities import my_math
  9 | 
 10 | 
 11 | DO_SAMPLE_GATED = 1  # Each subsequence overlaps by (n-1)/2
 12 | DO_SAMPLE_EXCLUSIVE = 2  # No index appears in two subsequences
 13 | DO_SAMPLE_ALL = 3  # Each index acts as the control point once
 14 | 
 15 | 
 16 | def compressList(targetList):
 17 |     """
 18 |     Compresses a list into pairs of the form (value, num_continuous_occurances)
 19 | 
 20 |     e.g. targetList = [1, 1, 1, 1, 2, 2, 1, 1, 3]
 21 |     >> [(1,4), (2, 2), (1, 2), (3, 1),]
 22 |     """
 23 | 
 24 |     currentValue = targetList[0]
 25 |     startIndex = 0
 26 |     i = 0
 27 | 
 28 |     outputList = []
 29 |     while i < len(targetList):
 30 |         if targetList[i] == currentValue:
 31 |             i += 1
 32 |             continue
 33 | 
 34 |         outputList.append([currentValue, startIndex, i])
 35 | 
 36 |         currentValue = targetList[i]
 37 |         startIndex = i
 38 |         i += 1
 39 | 
 40 |     if len(outputList) == 0 or outputList[-1][0] != currentValue:
 41 |         outputList.append([currentValue, startIndex, i])
 42 | 
 43 |     print(len(targetList))
 44 |     print(outputList)
 45 |     return outputList
 46 | 
 47 | 
 48 | def compressedListTransform(compressedList, timeStep, timeThreshold=None):
 49 |     """
 50 |     Isolates the unique values in compressedList and converts them to time
 51 | 
 52 |     timeThreshold can be set to ignore values that are not long enough, adding
 53 |     their content to whatever came before (prevents fragmenting data too much).
 54 |     """
 55 | 
 56 |     returnDict = {}
 57 |     countDict = {}
 58 |     lastGoodLabel = None
 59 |     for label, start, end in compressedList:
 60 |         countDict.setdefault(label, 0)
 61 |         returnDict.setdefault(label, [])
 62 | 
 63 |         startTime = start * timeStep
 64 |         endTime = end * timeStep
 65 | 
 66 |         # Merge this entry with the previous one
 67 |         # if it is too short (noise tolerance)
 68 |         tmpDuration = (end - start) * timeStep
 69 |         if timeThreshold is not None and tmpDuration < timeThreshold:
 70 |             # If the very first entry is less than 0.3 seconds long
 71 |             if lastGoodLabel is not None and returnDict[lastGoodLabel] != []:
 72 |                 returnDict[lastGoodLabel][-1][1] = endTime
 73 |                 continue
 74 | 
 75 |         # If the previous label and this one are the same, merge entries
 76 |         if label == lastGoodLabel:
 77 |             returnDict[label][-1][1] = endTime
 78 | 
 79 |         # Otherwise, create a new entry
 80 |         else:
 81 |             returnDict[label].append([startTime, endTime, str(countDict[label])])
 82 |             countDict[label] += 1
 83 |             lastGoodLabel = label
 84 | 
 85 |     return returnDict
 86 | 
 87 | 
 88 | def sampleMiddle(dataList, i, chunkSize):
 89 |     """
 90 |     The control point lies in the center (i - 1 ) / 2.0
 91 |     """
 92 |     assert (chunkSize % 2) == 1  # i must be an odd number
 93 |     halfChunk = int(math.floor(chunkSize / 2.0))
 94 | 
 95 |     subList = []
 96 |     indexList = []
 97 |     start = i - halfChunk if i - halfChunk >= 0 else 0
 98 |     end = i + halfChunk if i + halfChunk < len(dataList) else len(dataList) - 1
 99 | 
100 |     # Handling underflow
101 |     if i - halfChunk < 0:
102 |         subList += [
103 |             dataList[0],
104 |         ] * abs(i - halfChunk)
105 |         indexList += [
106 |             0,
107 |         ] * abs(i - halfChunk)
108 | 
109 |     # The normal range
110 |     mainBody = [dataList[j] for j in range(start, end + 1)]
111 |     uniqueChunkLen = len(mainBody)
112 |     subList.extend(mainBody)
113 |     indexList.extend([j for j in range(start, end + 1)])
114 | 
115 |     # Handling overflow
116 |     if i + halfChunk >= len(dataList):
117 |         subList += [
118 |             dataList[len(dataList) - 1],
119 |         ] * ((1 + i + halfChunk) - len(dataList))
120 |         indexList.extend(
121 |             [
122 |                 len(dataList) - 1,
123 |             ]
124 |             * ((1 + i + halfChunk - len(dataList)))
125 |         )
126 | 
127 |     return subList, indexList, uniqueChunkLen
128 | 
129 | 
130 | def sampleLeft(dataList, i, chunkSize):
131 |     """
132 |     The control point lies on the left edge (i = 0)
133 |     """
134 |     subList = []
135 |     indexList = []
136 |     start = i
137 |     end = i + chunkSize if i + chunkSize < len(dataList) else len(dataList)
138 | 
139 |     # The normal range
140 |     mainBody = [dataList[j] for j in range(start, end)]
141 |     uniqueChunkLen = len(mainBody)
142 |     subList.extend(mainBody)
143 |     indexList.extend([j for j in range(start, end)])
144 | 
145 |     # Handling overflow
146 |     if i + chunkSize >= len(dataList):
147 |         subList += [
148 |             dataList[len(dataList) - 1],
149 |         ] * ((1 + i + chunkSize) - len(dataList))
150 |         indexList += [
151 |             len(dataList) - 1,
152 |         ] * ((1 + i + chunkSize) - len(dataList))
153 | 
154 |     return subList, indexList, uniqueChunkLen
155 | 
156 | 
157 | def sampleRight(dataList, i, chunkSize):
158 |     """
159 |     The control point lies on the right edge (i = -1)
160 |     """
161 |     subList = []
162 |     indexList = []
163 |     start = 1 + i - chunkSize if 1 + i - chunkSize >= 0 else 0
164 |     end = i + 1 if i < len(dataList) else len(dataList)
165 | 
166 |     # Handling underflow
167 |     #     print("blah", abs(i - chunkSize), start, end)
168 |     if i - chunkSize < 0:
169 |         subList += [
170 |             dataList[0],
171 |         ] * (abs(i - chunkSize + 1))
172 |         indexList += [
173 |             0,
174 |         ] * (abs(i - chunkSize + 1))
175 | 
176 |     # The normal range
177 |     #     print(start, end)
178 |     mainBody = [dataList[j] for j in range(start, end)]
179 |     uniqueChunkLen = len(mainBody)
180 |     subList.extend(mainBody)
181 |     indexList.extend([j for j in range(start, end)])
182 | 
183 |     # Handling overflow
184 |     if i >= len(dataList):
185 |         subList += [
186 |             dataList[len(dataList) - 1],
187 |         ] * (chunkSize - len(indexList))
188 |         indexList += [
189 |             len(dataList) - 1,
190 |         ] * (chunkSize - len(indexList))
191 | 
192 |     return subList, indexList, uniqueChunkLen
193 | 
194 | 
195 | def subsequenceGenerator(dataList, chunkSize, sampleFunc, stepSizeFlag):
196 |     """
197 |     Can iteratively generate subsequences in a variety of fashions
198 | 
199 |     chunkSize - the size of each chunk
200 |     sampleFunc - e.g. sampleMiddle(), sampleLeft(), sampleRight(), determines
201 |         the 'controlPoint'
202 |     stepSize - the distance between starting points
203 | 
204 |     Regardless of the parameters, all values will appear in one of the
205 |     subsequences, including the endpoints.  Each subsequence is the same
206 |     length--if necessary, values are repeated on the tail ends of the
207 |     list
208 |     """
209 | 
210 |     if stepSizeFlag == DO_SAMPLE_EXCLUSIVE:
211 |         stepSize = chunkSize
212 |     elif stepSizeFlag == DO_SAMPLE_GATED:
213 |         stepSize = int(math.floor(chunkSize / 2.0))
214 |     elif stepSizeFlag == DO_SAMPLE_ALL:
215 |         stepSize = 1
216 | 
217 |     controlPoint = 0
218 |     finalIndex = 0
219 |     doneIterating = False
220 |     while not doneIterating:
221 |         subSequence, subSequenceIndices, sampledLen = sampleFunc(
222 |             dataList, controlPoint, chunkSize
223 |         )
224 | 
225 |         finalIndex = subSequenceIndices[-1]
226 |         isEndpointLastValue = finalIndex >= (len(dataList) - 1)
227 |         isControlPointLastValue = controlPoint >= (len(dataList) - 1)
228 | 
229 |         # Regardless of what the control point was, end when the last index
230 |         # in the subset matches the length of the data list
231 |         if stepSizeFlag == DO_SAMPLE_EXCLUSIVE:
232 |             doneIterating = isEndpointLastValue
233 | 
234 |         # When the control point index reaches the end of the data list
235 |         # (i.e., all values have been represented in some list, end)
236 |         else:
237 |             doneIterating = isControlPointLastValue
238 | 
239 |         controlPoint += stepSize
240 | 
241 |         if stepSizeFlag == DO_SAMPLE_GATED:
242 |             if sampleFunc == sampleMiddle:
243 |                 region = subSequenceIndices[int((chunkSize - 1) / 2.0) : -1]
244 |             elif sampleFunc == sampleLeft:
245 |                 region = subSequenceIndices[: int((chunkSize - 1) / 2.0)]
246 |             elif sampleFunc == sampleRight:
247 |                 region = subSequenceIndices[int((chunkSize - 1) / 2.0) + 1 :]
248 | 
249 |             sampledLen = int((chunkSize - 1) / 2.0)
250 |             sampledLen = sampledLen - (sampledLen - len(set(region)))
251 | 
252 |             if doneIterating and sampleFunc != sampleRight:
253 |                 sampledLen = 0
254 | 
255 |         yield subSequence, subSequenceIndices, sampledLen
256 | 
257 | 
258 | def interp(start, stop, n):
259 |     for i in range(n):
260 |         yield start + i * (stop - start) / float(n - 1)
261 | 
262 | 
263 | # Adapted this from online - for getting a set of evenly spaced intervals
264 | # from a list
265 | # http://stackoverflow.com/questions/10084436/generating-evenly-distributed-
266 | # multiples-samples-within-a-range
267 | def getEvenlySpacedSteps(start, end, n):
268 |     assert end + 1 - start >= n
269 | 
270 |     # The usual case
271 |     if n != 1:
272 |         step = (end - start) / float(n - 1)
273 |         retList = [int(round(start + x * step)) for x in range(n)]
274 | 
275 |     # If someone only wants 1 sample, just take the middle sample
276 |     elif n == 1:
277 |         step = (end - start) / float(2)
278 |         retList = [
279 |             int(round((end - start) / float(2))),
280 |         ]
281 | 
282 |     return retList
283 | 
284 | 
285 | def binDistribution(distList, numBins, minV=None, maxV=None):
286 |     """
287 |     Places all data into the closest of n evenly spaced bins
288 |     """
289 | 
290 |     if minV is None:
291 |         minV = min(distList)
292 | 
293 |     if maxV is None:
294 |         maxV = max(distList)
295 | 
296 |     binValueArray = my_math.linspace(minV, maxV, numBins)
297 | 
298 |     binnedValueList = []
299 |     for value in distList:
300 |         diffList = list(abs(binValueArray - value))
301 |         smallestDiff = min(diffList)
302 |         binIndex = diffList.index(smallestDiff)
303 | 
304 |         binnedValueList.append(binValueArray[binIndex])
305 | 
306 |     return binnedValueList
307 | 
308 | 
309 | def findLongestSublist(listOfLists):
310 |     longestList = []
311 |     i = None
312 |     for i, lst in enumerate(listOfLists):
313 |         if len(lst) > len(longestList):
314 |             longestList = lst
315 | 
316 |     return i, longestList
317 | 
318 | 
319 | def invertIntervalList(entryList, minValue=0, maxValue=None):
320 |     """
321 |     Given a list of ordinal events, inverts the start and end positions
322 | 
323 |     e.g. input [(5, 6), (10, 13), (14, 16)]
324 |          output [(0, 5), (6, 10), (13, 14)]
325 |     """
326 |     if entryList == []:
327 |         return []
328 | 
329 |     newEntryList = []
330 |     i = 0
331 | 
332 |     # Add possible initial interval
333 |     if minValue is not None:
334 |         if entryList[0][0] > minValue:
335 |             newEntryList.append((minValue, entryList[0][0]))
336 | 
337 |     while i + 1 < len(entryList):
338 |         newEntryList.append((entryList[i][1], entryList[i + 1][0]))
339 |         i += 1
340 | 
341 |     # Add possible trailing interval
342 |     if maxValue is not None:
343 |         if entryList[i][1] < maxValue:
344 |             newEntryList.append((entryList[i][1], maxValue))
345 | 
346 |     return newEntryList
347 | 


--------------------------------------------------------------------------------
/pyacoustics/utilities/statistics.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Apr 2, 2015
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import math
 8 | 
 9 | 
10 | def medianFilter(dist, window, useEdgePadding):
11 |     offset = int(math.floor(window / 2.0))
12 |     length = len(dist)
13 | 
14 |     returnList = []
15 |     for x in range(length):
16 |         dataToFilter = []
17 |         # If using edge padding or if 0 <= context <= length
18 |         if useEdgePadding or (((0 <= x - offset) and (x + offset < length))):
19 |             preContext = []
20 |             currentContext = [
21 |                 dist[x],
22 |             ]
23 |             postContext = []
24 | 
25 |             lastKnownLargeIndex = 0
26 |             for y in range(1, offset + 1):  # 1-based
27 |                 if x + y >= length:
28 |                     if lastKnownLargeIndex == 0:
29 |                         largeIndexValue = x
30 |                     else:
31 |                         largeIndexValue = lastKnownLargeIndex
32 |                 else:
33 |                     largeIndexValue = x + y
34 |                     lastKnownLargeIndex = x + y
35 | 
36 |                 postContext.append(dist[largeIndexValue])
37 | 
38 |                 if x - y < 0:
39 |                     smallIndexValue = 0
40 |                 else:
41 |                     smallIndexValue = x - y
42 | 
43 |                 preContext.insert(0, dist[smallIndexValue])
44 | 
45 |             dataToFilter = preContext + currentContext + postContext
46 |             value = getMedian(dataToFilter)
47 |         else:
48 |             value = dist[x]
49 | 
50 |         returnList.append(value)
51 | 
52 |     return returnList
53 | 
54 | 
55 | def getMedian(dist):
56 |     assert len(dist) > 0
57 | 
58 |     dist = sorted(dist)
59 |     length = len(dist)
60 | 
61 |     halfPoint = int(length / 2.0)
62 | 
63 |     if length % 2 == 0:
64 |         median = (dist[halfPoint - 1] + dist[halfPoint]) / 2.0
65 |     else:
66 |         median = dist[halfPoint]
67 | 
68 |     return median
69 | 


--------------------------------------------------------------------------------
/pyacoustics/utilities/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Oct 11, 2012
  3 | 
  4 | @author: timmahrt
  5 | """
  6 | 
  7 | import os
  8 | from os.path import join
  9 | 
 10 | import functools
 11 | import itertools
 12 | import shutil
 13 | import io
 14 | import inspect
 15 | 
 16 | 
 17 | pyAcousticsPath = os.path.split(inspect.getfile(inspect.currentframe()))[0]
 18 | # Get out of the 'utilities' folder
 19 | pyAcousticsPath = os.path.split(pyAcousticsPath)[0]
 20 | scriptsPath = join(pyAcousticsPath, "praatScripts")
 21 | 
 22 | 
 23 | def _getMatchFunc(pattern):
 24 |     """
 25 |     An unsophisticated pattern matching function
 26 |     """
 27 | 
 28 |     # '#' Marks word boundaries, so if there is more than one we need to do
 29 |     #    something special to make sure we're not mis-representings them
 30 |     assert pattern.count("#") < 2
 31 | 
 32 |     def startsWith(subStr, fullStr):
 33 |         return fullStr[: len(subStr)] == subStr
 34 | 
 35 |     def endsWith(subStr, fullStr):
 36 |         return fullStr[-1 * len(subStr) :] == subStr
 37 | 
 38 |     def inStr(subStr, fullStr):
 39 |         return subStr in fullStr
 40 | 
 41 |     # Selection of the correct function
 42 |     if pattern[0] == "#":
 43 |         pattern = pattern[1:]
 44 |         cmpFunc = startsWith
 45 | 
 46 |     elif pattern[-1] == "#":
 47 |         pattern = pattern[:-1]
 48 |         cmpFunc = endsWith
 49 | 
 50 |     else:
 51 |         cmpFunc = inStr
 52 | 
 53 |     return functools.partial(cmpFunc, pattern)
 54 | 
 55 | 
 56 | def findFiles(
 57 |     path,
 58 |     filterPaths=False,
 59 |     filterExt=None,
 60 |     filterPattern=None,
 61 |     skipIfNameInList=None,
 62 |     stripExt=False,
 63 | ):
 64 |     fnList = os.listdir(path)
 65 | 
 66 |     if filterPaths is True:
 67 |         fnList = [
 68 |             folderName
 69 |             for folderName in fnList
 70 |             if os.path.isdir(os.path.join(path, folderName))
 71 |         ]
 72 | 
 73 |     if filterExt is not None:
 74 |         splitFNList = [
 75 |             [
 76 |                 fn,
 77 |             ]
 78 |             + list(os.path.splitext(fn))
 79 |             for fn in fnList
 80 |         ]
 81 |         fnList = [fn for fn, name, ext in splitFNList if ext == filterExt]
 82 | 
 83 |     if filterPattern is not None:
 84 |         splitFNList = [
 85 |             [
 86 |                 fn,
 87 |             ]
 88 |             + list(os.path.splitext(fn))
 89 |             for fn in fnList
 90 |         ]
 91 |         matchFunc = _getMatchFunc(filterPattern)
 92 |         fnList = [fn for fn, name, ext in splitFNList if matchFunc(name)]
 93 | 
 94 |     if skipIfNameInList is not None:
 95 |         targetNameList = [os.path.splitext(fn)[0] for fn in skipIfNameInList]
 96 |         fnList = [fn for fn in fnList if os.path.splitext(fn)[0] not in targetNameList]
 97 | 
 98 |     if stripExt is True:
 99 |         fnList = [os.path.splitext(fn)[0] for fn in fnList]
100 | 
101 |     fnList.sort()
102 |     return fnList
103 | 
104 | 
105 | def openCSV(path, fn, valueIndex=None, encoding="utf-8"):
106 |     """
107 |     Load a feature
108 | 
109 |     In many cases we only want a single value from the feature (mainly because
110 |     the feature only contains one value).  In these situations, the user
111 |     can indicate that rather than receiving a list of lists, they can receive
112 |     a lists of values, where each value represents the item in the row
113 |     indicated by valueIndex.
114 |     """
115 | 
116 |     # Load CSV file
117 |     with io.open(join(path, fn), "r", encoding=encoding) as fd:
118 |         featureList = fd.read().splitlines()
119 |     featureList = [row.split(",") for row in featureList]
120 | 
121 |     if valueIndex is not None:
122 |         featureList = [row[valueIndex] for row in featureList]
123 | 
124 |     return featureList
125 | 
126 | 
127 | def changeFileType(path, fromExt, toExt):
128 |     if fromExt[0] != ".":
129 |         fromExt = "." + fromExt
130 |     if toExt[0] != ".":
131 |         toExt = "." + toExt
132 | 
133 |     for fn in os.listdir(path):
134 |         name, ext = os.path.splitext(fn)
135 |         if ext == fromExt:
136 |             shutil.move(join(path, fn), join(path, name + toExt))
137 | 
138 | 
139 | def makeDir(path):
140 |     if not os.path.exists(path):
141 |         os.mkdir(path)
142 | 
143 | 
144 | def extractLines(path, matchStr, outputDir="output"):
145 |     outputPath = join(path, outputDir)
146 |     makeDir(outputPath)
147 | 
148 |     for fn in findFiles(path, filterExt=".csv"):
149 |         with io.open(join(path, fn), "r", encoding="utf-8") as fd:
150 |             data = fd.read()
151 |         dataList = data.split("\n")
152 | 
153 |         dataList = [line for line in dataList if matchStr in line]
154 | 
155 |         with io.open(join(outputPath, fn), "w", encoding="utf-8") as fd:
156 |             fd.write("\n".join(dataList))
157 | 
158 | 
159 | def cat(fn1, fn2, outputFN):
160 |     with io.open(fn1, "r", encoding="utf-8") as fd:
161 |         txt1 = fd.read()
162 |     with io.open(fn2, "r", encoding="utf-8") as fd:
163 |         txt2 = fd.read()
164 | 
165 |     with io.open(outputFN, "w", encoding="utf-8") as fd:
166 |         fd.write(txt1 + txt2)
167 | 
168 | 
169 | def catAll(path, ext, ensureNewline=False):
170 |     outputPath = join(path, "cat_output")
171 |     makeDir(outputPath)
172 | 
173 |     outputList = []
174 |     for fn in findFiles(path, filterExt=ext):
175 |         with io.open(join(path, fn), "r", encoding="utf-8") as fd:
176 |             data = fd.read()
177 | 
178 |         if ensureNewline and data[-1] != "\n":
179 |             data += "\n"
180 | 
181 |         outputList.append(data)
182 | 
183 |     outputTxt = "".join(outputList)
184 |     outputFN = join(outputPath, "catFiles" + ext)
185 |     with io.open(outputFN, "w", encoding="utf-8") as fd:
186 |         fd.write(outputTxt)
187 | 
188 | 
189 | def whatever(path):
190 |     outputList = []
191 |     for fn in findFiles(path, filterExt=".txt"):
192 |         outputList.extend(
193 |             [
194 |                 fn,
195 |             ]
196 |             * 30
197 |         )
198 | 
199 |     for fn in outputList:
200 |         print(fn)
201 | 
202 | 
203 | def divide(numerator, denominator, zeroValue):
204 |     if denominator == 0:
205 |         retValue = zeroValue
206 |     else:
207 |         retValue = numerator / float(denominator)
208 | 
209 |     return retValue
210 | 
211 | 
212 | def safeZip(listOfLists, enforceLength):
213 |     if enforceLength is True:
214 |         length = len(listOfLists[0])
215 |         assert all([length == len(subList) for subList in listOfLists])
216 | 
217 |     return itertools.izip_longest(*listOfLists)
218 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | """
 4 | Created on Oct 15, 2014
 5 | 
 6 | @author: tmahrt
 7 | """
 8 | from setuptools import setup
 9 | import io
10 | 
11 | setup(
12 |     name="pyacoustics",
13 |     python_requires=">3.6.0",
14 |     version="2.0.0",
15 |     author="Tim Mahrt",
16 |     author_email="timmahrt@gmail.com",
17 |     url="https://github.com/timmahrt/pyAcoustics",
18 |     package_dir={"pyacoustics": "pyacoustics"},
19 |     packages=[
20 |         "pyacoustics",
21 |         "pyacoustics.intensity_and_pitch",
22 |         "pyacoustics.signals",
23 |         "pyacoustics.speech_detection",
24 |         "pyacoustics.speech_rate",
25 |         "pyacoustics.text",
26 |         "pyacoustics.textgrids",
27 |         "pyacoustics.utilities",
28 |     ],
29 |     package_data={
30 |         "pyacoustics": [
31 |             "matlabScripts/detect_syllable_nuclei.m",
32 |         ]
33 |     },
34 |     license="LICENSE",
35 |     install_requires=[
36 |         "praatio ~= 6.0",
37 |         "typing_extensions",
38 |     ],
39 |     description="A collection of python scripts for extracting and analyzing acoustics from audio files.",
40 |     long_description=io.open("README.md", "r", encoding="utf-8").read(),
41 |     long_description_content_type="text/markdown",
42 | )
43 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/tests/__init__.py


--------------------------------------------------------------------------------
/tests/integration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timmahrt/pyAcoustics/c778e4ada301f420a71bf9f6d4b51beccccaecde/tests/integration/__init__.py


--------------------------------------------------------------------------------
/tests/integration/test_integration.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Nov 21, 2021
 5 | 
 6 | @author: tmahrt
 7 | 
 8 | Runs integration tests
 9 | 
10 | The examples were all written as scripts.  They weren't meant to be
11 | imported or run from other code.  So here, the integration test is just
12 | importing the scripts, which causes them to execute.  If the code completes
13 | with no errors, then the code is at least able to complete.
14 | 
15 | Testing whether or not the code actually did what it is supposed to is
16 | another issue and will require some refactoring.
17 | """
18 | 
19 | import unittest
20 | import os
21 | import sys
22 | from pathlib import Path
23 | 
24 | _root = os.path.join(Path(__file__).parents[2], "examples")
25 | sys.path.append(_root)
26 | 
27 | 
28 | class TestIntegration(unittest.TestCase):
29 |     """Integration tests"""
30 | 
31 |     def test_estimate_speech_rate(self):
32 |         """Running 'add_tiers.py'"""
33 |         import estimate_speech_rate
34 | 
35 |     def test_frequency(self):
36 |         """Running 'anonymize_recording'"""
37 |         import frequency
38 | 
39 |     def test_split_audio_on_silence(self):
40 |         """Running 'calculate_duration.py'"""
41 |         import split_audio_on_silence
42 | 
43 |     def test_split_audio_on_tone(self):
44 |         """Running 'correct_misaligned_tiers.py'"""
45 |         import split_audio_on_tone
46 | 
47 |     def setUp(self):
48 |         unittest.TestCase.setUp(self)
49 | 
50 |         root = os.path.join(_root, "files")
51 |         self.oldRoot = os.getcwd()
52 |         os.chdir(_root)
53 |         self.startingList = os.listdir(root)
54 |         self.startingDir = os.getcwd()
55 | 
56 |     def tearDown(self):
57 |         """Remove any files generated during the test"""
58 |         # unittest.TestCase.tearDown(self)
59 | 
60 |         root = os.path.join(".", "files")
61 |         endingList = os.listdir(root)
62 |         endingDir = os.getcwd()
63 |         rmList = [fn for fn in endingList if fn not in self.startingList]
64 | 
65 |         if self.oldRoot == root:
66 |             for fn in rmList:
67 |                 fnFullPath = os.path.join(root, fn)
68 |                 if os.path.isdir(fnFullPath):
69 |                     os.rmdir(fnFullPath)
70 |                 else:
71 |                     os.remove(fnFullPath)
72 | 
73 |         os.chdir(self.oldRoot)
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     unittest.main()
78 | 


--------------------------------------------------------------------------------
/tests/test_sequences.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Jul 3, 2015
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import unittest
 8 | 
 9 | from pyacoustics.utilities import sequences
10 | 
11 | 
12 | class TestSequences(unittest.TestCase):
13 | 
14 |     LIST_A = [(5, 6), (10, 13), (14, 16)]
15 |     LIST_B = [(0, 1), (5, 6), (10, 13), (14, 16)]
16 | 
17 |     def test_startsAtZero(self):
18 |         invertedList = sequences.invertIntervalList(self.LIST_B)
19 |         correctAnswer = [(1, 5), (6, 10), (13, 14)]
20 |         self.assertEqual(invertedList, correctAnswer)
21 | 
22 |     def test_startsAtNonZero(self):
23 |         invertedList = sequences.invertIntervalList(self.LIST_A)
24 |         correctAnswer = [(0, 5), (6, 10), (13, 14)]
25 |         self.assertEqual(invertedList, correctAnswer)
26 | 
27 |     def test_maxValue(self):
28 |         invertedList = sequences.invertIntervalList(self.LIST_B, maxValue=20)
29 |         correctAnswer = [(1, 5), (6, 10), (13, 14), (16, 20)]
30 |         self.assertEqual(invertedList, correctAnswer)
31 | 
32 |     def test_minValue(self):
33 |         invertedList = sequences.invertIntervalList(self.LIST_A, minValue=3)
34 |         correctAnswer = [(3, 5), (6, 10), (13, 14)]
35 |         self.assertEqual(invertedList, correctAnswer)
36 | 
37 |     def test_twiceInverted(self):
38 |         invertedList = sequences.invertIntervalList(self.LIST_A, 0, 20)
39 |         twiceInvList = sequences.invertIntervalList(invertedList, 0, 20)
40 |         self.assertEqual(self.LIST_A, twiceInvList)
41 | 


--------------------------------------------------------------------------------
/tests/test_statistics.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Apr 2, 2015
 3 | 
 4 | @author: tmahrt
 5 | """
 6 | 
 7 | import unittest
 8 | 
 9 | from pyacoustics.utilities import statistics
10 | 
11 | 
12 | class TestStatistics(unittest.TestCase):
13 | 
14 |     MY_LIST = [5, 1, 10, 13, 3, 17, 9, 17]
15 | 
16 |     def test_evenLengthedListCorrect(self):
17 |         median = statistics.getMedian(self.MY_LIST)
18 |         self.assertEqual(median, 9.5)
19 | 
20 |     def test_oddLengthedListCorrect(self):
21 |         median = statistics.getMedian(self.MY_LIST[:-1])
22 |         self.assertEqual(median, 9)
23 | 
24 |     def test_filterOddLengthedListCorrect(self):
25 |         medianList = statistics.medianFilter(self.MY_LIST, 3, useEdgePadding=True)
26 |         correctList = [5, 5, 10, 10, 13, 9, 17, 17]
27 |         self.assertEqual(medianList, correctList)
28 | 


--------------------------------------------------------------------------------