├── .coveragerc ├── .gitignore ├── .pylintrc ├── .travis.yml ├── AUTHORS.rst ├── CHANGES.rst ├── CONTRIBUTING.md ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.rst ├── conftest.py ├── docs ├── Makefile ├── _static │ └── .placeholder ├── audio │ └── index.rst ├── central │ └── index.rst ├── conf.py ├── distort │ └── index.rst ├── index.rst ├── inner │ └── index.rst ├── make.bat ├── middle │ └── index.rst ├── outer │ └── index.rst ├── speech │ └── index.rst └── utils │ └── index.rst ├── examples └── .place_holder ├── optional-requirements.txt ├── pambox ├── __init__.py ├── audio.py ├── central │ ├── __init__.py │ ├── decision_metrics.py │ ├── ec.py │ └── modulation_filterbanks.py ├── distort.py ├── inner.py ├── middle.py ├── outer.py ├── speech │ ├── __init__.py │ ├── bsepsm.py │ ├── experiment.py │ ├── material.py │ ├── mrsepsm.py │ ├── sepsm.py │ └── sii.py ├── tests │ ├── data │ │ ├── dummy_ssn.wav │ │ ├── test_GammatoneFilterbank_filtering.mat │ │ ├── test_full_sepsm.csv │ │ ├── test_hilbert_env_and_lp_filtering_v1.mat │ │ ├── test_mr_sepsm_full.mat │ │ ├── test_mr_sepsm_mr_env_powers.mat │ │ ├── test_mr_sepsm_mr_snr_env_mix.mat │ │ ├── test_mr_sepsm_mr_snr_env_noise.mat │ │ ├── test_mr_sepsm_snrenv_mr_v1.mat │ │ ├── test_mr_sepsm_time_average_snr.mat │ │ ├── test_sepsm_spec_sub_0dB_kappa_0_8.mat │ │ ├── test_sii.csv │ │ ├── test_spec_sub_kappa_0.csv │ │ ├── test_spec_sub_kappa_1.csv │ │ └── test_third_octave_filtering_of_noise.csv │ ├── test_bsepsm.py │ ├── test_central.py │ ├── test_ec.py │ ├── test_experiment.py │ ├── test_inner.py │ ├── test_mrsepsm.py │ ├── test_sepsm.py │ ├── test_sii.py │ ├── test_speech_material.py │ └── test_utils.py └── utils.py ├── readthedocs-requirements.txt ├── requirements.txt ├── setup.py ├── testing_requirements.txt └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit=pambox/tests/* -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | 38 | # Rope 39 | .ropeproject/* 40 | 41 | # iPython notebook 42 | .ipynb_checkpoints/* 43 | 44 | # Unused files that I keep in the directory 45 | unused/* 46 | design/* 47 | 48 | *.DS_Store 49 | 50 | # PyCharm settings 51 | .idea 52 | 53 | .cache/ 54 | 55 | # Todo files 56 | todo.taskpaper 57 | 58 | docs/_build 59 | 60 | # Ctags file 61 | tags 62 | 63 | # Development IPython notebookts 64 | devnotebooks/ 65 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # Specify a configuration file. 4 | rcfile=.pylintrc 5 | 6 | # Python code to execute, usually for sys.path manipulation such as 7 | # pygtk.require(). 8 | #init-hook= 9 | 10 | # Profiled execution. 11 | profile=no 12 | 13 | # Add files or directories to the blacklist. They should be base names, not 14 | # paths. 15 | ignore=.git,scripts,docs,dist 16 | 17 | # Pickle collected data for later comparisons. 18 | persistent=yes 19 | 20 | # List of plugins (as comma separated values of python modules names) to load, 21 | # usually to register additional checkers. 22 | load-plugins= 23 | 24 | 25 | [MESSAGES CONTROL] 26 | 27 | # Enable the message, report, category or checker with the given id(s). You can 28 | # either give multiple identifier separated by comma (,) or put this option 29 | # multiple time. See also the "--disable" option for examples. 30 | #enable= 31 | 32 | # Disable the message, report, category or checker with the given id(s). You 33 | # can either give multiple identifiers separated by comma (,) or put this 34 | # option multiple times (only on the command line, not in the configuration 35 | # file where it should appear only once).You can also use "--disable=all" to 36 | # disable everything first and then reenable specific checks. For example, if 37 | # you want to run only the similarities checker, you can use "--disable=all 38 | # --enable=similarities". If you want to run only the classes checker, but have 39 | # no Warning level messages displayed, use"--disable=all --enable=classes 40 | # --disable=W" 41 | disable=maybe-no-member,pointless-string-statement,import-error,no-name-in-module,star-args,no-member 42 | 43 | 44 | [REPORTS] 45 | 46 | # Set the output format. Available formats are text, parseable, colorized, msvs 47 | # (visual studio) and html. You can also give a reporter class, eg 48 | # mypackage.mymodule.MyReporterClass. 49 | output-format=text 50 | 51 | # Put messages in a separate file for each module / package specified on the 52 | # command line instead of printing them on stdout. Reports (if any) will be 53 | # written in a file name "pylint_global.[txt|html]". 54 | files-output=no 55 | 56 | # Tells whether to display a full report or only the messages 57 | reports=yes 58 | 59 | # Python expression which should return a note less than 10 (10 is the highest 60 | # note). You have access to the variables errors warning, statement which 61 | # respectively contain the number of errors / warnings messages and the total 62 | # number of statements analyzed. This is used by the global evaluation report 63 | # (RP0004). 64 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 65 | 66 | # Add a comment according to your evaluation note. This is used by the global 67 | # evaluation report (RP0004). 68 | comment=no 69 | 70 | # Template used to display messages. This is a python new-style format string 71 | # used to format the massage information. See doc for all details 72 | #msg-template= 73 | 74 | 75 | [BASIC] 76 | 77 | # Required attributes for module, separated by a comma 78 | required-attributes= 79 | 80 | # List of builtins function names that should not be used, separated by a comma 81 | bad-functions=map,filter,apply,input 82 | 83 | # Regular expression which should only match correct module names 84 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 85 | 86 | # Regular expression which should only match correct module level names 87 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 88 | 89 | # Regular expression which should only match correct class names 90 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 91 | 92 | # Regular expression which should only match correct function names 93 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 94 | 95 | # Regular expression which should only match correct method names 96 | method-rgx=[a-z_][a-z0-9_]{2,50}$ 97 | 98 | # Regular expression which should only match correct instance attribute names 99 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 100 | 101 | # Regular expression which should only match correct argument names 102 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 103 | 104 | # Regular expression which should only match correct variable names 105 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 106 | 107 | # Regular expression which should only match correct attribute names in class 108 | # bodies 109 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 110 | 111 | # Regular expression which should only match correct list comprehension / 112 | # generator expression variable names 113 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 114 | 115 | # Good variable names which should always be accepted, separated by a comma 116 | good-names=i,j,k,ex,Run,_,app,db,sandman_model,Model 117 | 118 | # Bad variable names which should always be refused, separated by a comma 119 | bad-names=foo,bar,baz,toto,tutu,tata 120 | 121 | # Regular expression which should only match function or class names that do 122 | # not require a docstring. 123 | no-docstring-rgx=__.*__ 124 | 125 | # Minimum line length for functions/classes that require docstrings, shorter 126 | # ones are exempt. 127 | docstring-min-length=-1 128 | 129 | 130 | [FORMAT] 131 | 132 | # Maximum number of characters on a single line. 133 | max-line-length=160 134 | 135 | # Regexp for a line that is allowed to be longer than the limit. 136 | ignore-long-lines=^\s*(# )??$ 137 | 138 | # Maximum number of lines in a module 139 | max-module-lines=1000 140 | 141 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 142 | # tab). 143 | indent-string=' ' 144 | 145 | 146 | [MISCELLANEOUS] 147 | 148 | # List of note tags to take in consideration, separated by a comma. 149 | notes=FIXME,XXX,TODO 150 | 151 | 152 | [SIMILARITIES] 153 | 154 | # Minimum lines number of a similarity. 155 | min-similarity-lines=4 156 | 157 | # Ignore comments when computing similarities. 158 | ignore-comments=yes 159 | 160 | # Ignore docstrings when computing similarities. 161 | ignore-docstrings=yes 162 | 163 | # Ignore imports when computing similarities. 164 | ignore-imports=no 165 | 166 | 167 | [TYPECHECK] 168 | 169 | # Tells whether missing members accessed in mixin class should be ignored. A 170 | # mixin class is detected if its name ends with "mixin" (case insensitive). 171 | ignore-mixin-members=yes 172 | 173 | # List of classes names for which member attributes should not be checked 174 | # (useful for classes with attributes dynamically set). 175 | ignored-classes=SQLObject 176 | 177 | # When zope mode is activated, add a predefined set of Zope acquired attributes 178 | # to generated-members. 179 | zope=no 180 | 181 | # List of members which are set dynamically and missed by pylint inference 182 | # system, and so shouldn't trigger E0201 when accessed. Python regular 183 | # expressions are accepted. 184 | generated-members=REQUEST,acl_users,aq_parent 185 | 186 | 187 | [VARIABLES] 188 | 189 | # Tells whether we should check for unused import in __init__ files. 190 | init-import=no 191 | 192 | # A regular expression matching the beginning of the name of dummy variables 193 | # (i.e. not used). 194 | dummy-variables-rgx=_|dummy 195 | 196 | # List of additional names supposed to be defined in builtins. Remember that 197 | # you should avoid to define new builtins when possible. 198 | additional-builtins= 199 | 200 | 201 | [CLASSES] 202 | 203 | # List of interface methods to ignore, separated by a comma. This is used for 204 | # instance to not check methods defines in Zope's Interface base class. 205 | ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by 206 | 207 | # List of method names used to declare (i.e. assign) instance attributes. 208 | defining-attr-methods=__init__,__new__,setUp 209 | 210 | # List of valid names for the first argument in a class method. 211 | valid-classmethod-first-arg=cls 212 | 213 | # List of valid names for the first argument in a metaclass class method. 214 | valid-metaclass-classmethod-first-arg=mcs 215 | 216 | 217 | [DESIGN] 218 | 219 | # Maximum number of arguments for function / method 220 | max-args=5 221 | 222 | # Argument names that match this expression will be ignored. Default to name 223 | # with leading underscore 224 | ignored-argument-names=_.* 225 | 226 | # Maximum number of locals for function / method body 227 | max-locals=15 228 | 229 | # Maximum number of return / yield for function / method body 230 | max-returns=6 231 | 232 | # Maximum number of branch for function / method body 233 | max-branches=12 234 | 235 | # Maximum number of statements in function / method body 236 | max-statements=50 237 | 238 | # Maximum number of parents for a class (see R0901). 239 | max-parents=7 240 | 241 | # Maximum number of attributes for a class (see R0902). 242 | max-attributes=7 243 | 244 | # Minimum number of public methods for a class (see R0903). 245 | min-public-methods=2 246 | 247 | # Maximum number of public methods for a class (see R0904). 248 | max-public-methods=20 249 | 250 | 251 | [IMPORTS] 252 | 253 | # Deprecated modules which should not be used, separated by a comma 254 | deprecated-modules=regsub,string,TERMIOS,Bastion,rexec 255 | 256 | # Create a graph of every (i.e. internal and external) dependencies in the 257 | # given file (report RP0402 must not be disabled) 258 | import-graph= 259 | 260 | # Create a graph of external dependencies in the given file (report RP0402 must 261 | # not be disabled) 262 | ext-import-graph= 263 | 264 | # Create a graph of internal dependencies in the given file (report RP0402 must 265 | # not be disabled) 266 | int-import-graph= 267 | 268 | 269 | [EXCEPTIONS] 270 | 271 | # Exceptions that will emit a warning when being caught. Defaults to 272 | # "Exception" 273 | overgeneral-exceptions=Exception 274 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - 2.7 5 | - 3.4 6 | 7 | 8 | before_install: 9 | 10 | # Use utf8 encoding. Should be default, but this is insurance against 11 | # future changes 12 | - export PYTHONIOENCODING=UTF8 13 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh 14 | - chmod +x miniconda.sh 15 | - ./miniconda.sh -b 16 | - export PATH=/home/travis/miniconda2/bin:$PATH 17 | - conda update --yes conda 18 | 19 | 20 | install: 21 | - conda create --yes -n test-environment python=$TRAVIS_PYTHON_VERSION 22 | - conda update --yes conda 23 | - source activate test-environment 24 | # Useful for debugging any issues with conda 25 | - conda info -a 26 | 27 | - conda install --yes pip 28 | - conda install --yes six 29 | - conda install --yes ipython-notebook 30 | - conda install --yes numpy 31 | - conda install --yes scipy 32 | - conda install --yes pandas 33 | - conda install --yes matplotlib 34 | - conda install --yes pytest 35 | - conda list 36 | - python setup.py install 37 | 38 | script: 39 | - py.test --runslow pambox/tests 40 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | 2 | - Alexandre Chabot-Leclerc 3 | -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | CHANGES 2 | ======= 3 | 4 | master (unreleased) 5 | 6 | API changes 7 | ----------- 8 | - Added optional `model` parameter to `Experiment.pred_to_pc` to select only 9 | certain models and model outputs for the conversion to percent correct. 10 | - The `speech.Material` class takes directly the path to the sentences and to 11 | the speech-shaped noise. This allows the user to use auto-complete. 12 | - The py:func:`~pambox.utils.int2srt` function return `np.nan` if the SRT is 13 | not found, instead of returning None. 14 | - Renamed the SRT parameter in pa:func:`~pambox.utils.int2srt` to `srt_at` instead of `srt`. 15 | - Change the default folder for speech experiment outputs to `output`. 16 | - Add parameter to adjust levels before or after the application of the 17 | distortion in speech intelligibility experiments. The default is to apply it 18 | after the distortion. 19 | 20 | Enhancements 21 | ------------ 22 | 23 | - Possibility to run experiments in parallel using IPython.parallel. See the 24 | :py:func:`~pambox.speech.experiment.run` function. 25 | - `utils.fftfilt` now mirrors Matlab's behavior. Given coefficients `b` and 26 | signal `x`: If `x` is a matrix, the rows are filtered. If `b` is a matrix, 27 | each filter is applied to `x`. If both `b` and `x` are matrices with the same 28 | number of rows, each row of `x` is filtered with the respective row of `b`. 29 | - The `Experiment` class tries to create the output folder if it does not exist. 30 | - The speech material name is saved out the output data frame when running a 31 | speech intelligibility experiment. 32 | - Added the function py:func:`~pambox.speech.material.Material.average_level` 33 | to measure the average level of a speech material. 34 | - Added py:func:`~pambox.speech.experiment.srts_from_df` to convert 35 | intelligibility predictions to SRTs. 36 | - The py:func:`~pambox.speech.experiment.next_masker` function now takes a 37 | dictionary with all the parameters of the experiment. It does not change the 38 | default behavior of the function be default, but it allows for using that 39 | parameter if the `next_masker` function is overriden. 40 | - The name of the columns saved during a speech intelligibility experiment are 41 | defined as class parameters, rather than being hard-coded. 42 | - Add optional `ax` parameter to py:func:`~pambox.speech.experiment.plot_results`. 43 | - Function py:func:`~pambox.speech.experiment.pred_to_pc` can now convert prediction to intelligibility for a specific model. 44 | - Possibility to force the audio file to be mono when loading file in 45 | py:class:`pambox.speech.material.Material`. 46 | - Add py:func:`~pambox.utils.read_wav_as_float` as a convenience function to 47 | read wave files as float. 48 | 49 | Performance 50 | ----------- 51 | 52 | Bug fixes 53 | --------- 54 | 55 | - Fixed #14 in the function py:func:`~pambox.central.mod_filterbank` that made 56 | the filterbank acausal. The filterbank now produces the same time output as using 57 | Butterworth filter coefficients and the `scipy.signal.filtfilt` function. 58 | - Fix #16: the ideal observer fits the average intelligibility, across all 59 | sentences, to the reference data, rather than trying to fit all sentences at 60 | once. 61 | - Fix #17: Removed unnecessary compensation factor in the sEPSM. It 62 | compensated for the filter bandwidth when computing the bands above threshold 63 | . The tests tolerance had to be adjusted; for the spectral subtraction case, 64 | the relative difference compared to the Matlab code is smaller than 8%. In 65 | the condition with speech-shaped noise only, the difference is smaller than 0 66 | .1%. 67 | - The py:func:`~pambox.speech.material.Material.set_level` function uses 68 | compensates for the reference sentence level using the correct sign. 69 | - Fix the py:func:`~pambox.utils.hilbert` definition that had been mangled in 70 | a merge. It is now the same as in `scipy.signal`. 71 | - Fix #25: py:func:`~pambox.utils.setdbspl` and py:func:`~pambox.utils.rms` 72 | now behave properly with input arrays that have more than one dimension. 73 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | How to contribute 2 | ================= 3 | 4 | The preferred way to contribute to pambox is to fork the 5 | [main repository](http://github.com/achabotl/pambox/) on GitHub: 6 | 7 | 8 | Getting started 9 | --------------- 10 | 11 | 1. Fork the [project repository](http://github.com/achabotl/pambox): click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. 12 | 2. Clone this copy to your local disk: 13 | 14 | $ git clone git@github.com:YourLogin/pambox.git 15 | 16 | 17 | 18 | Making changes 19 | -------------- 20 | 21 | * Make commits of logical units. 22 | * Check for unnecessary whitespace with `git diff --check` before committing. 23 | * Make sure you have added the necessary tests for your changes. 24 | * Aim for at least 80% coverage on your code 25 | * Run `python setup.py test` to make sure your tests pass 26 | * Run `coverage run --source=pambox setup.py test` if you have the `coverage` 27 | package installed to generate coverage data 28 | * Check your coverage by running `coverage report` 29 | 30 | When you've recorded your changes in Git, then push them to GitHub with: 31 | 32 | $ git push -u origin my-feature 33 | 34 | Finally, go to the web page of the your fork of the `pambox` repo, 35 | and click 'Pull request' to send your changes to the maintainers for 36 | review. This will send an email to the committers. 37 | 38 | (If any of the above seems like magic to you, then look up the 39 | [Git documentation](http://git-scm.com/documentation) on the web.) 40 | 41 | It is recommended to check that your contribution complies with the 42 | following rules before submitting a pull request: 43 | 44 | - All public methods should have informative docstrings with sample 45 | usage presented as doctests when appropriate. 46 | 47 | - All other tests pass when everything is rebuilt from scratch. On 48 | Unix-like systems, check with (from the toplevel source folder): 49 | 50 | $ python setup.py test 51 | 52 | - When adding additional functionality, provide at least one 53 | example script in the ``examples/`` folder. Have a look at other 54 | examples for reference. 55 | 56 | You can also check for common programming errors with the following 57 | tools: 58 | 59 | - Code with good unittest coverage (at least 80%), check with: 60 | 61 | $ pip install pytest pytest-cov 62 | $ py.test --cov path/to/pambox 63 | 64 | - No pyflakes warnings, check with: 65 | 66 | $ pip install pyflakes 67 | $ pyflakes path/to/module.py 68 | 69 | - No PEP8 warnings, check with: 70 | 71 | $ pip install pep8 72 | $ pep8 path/to/module.py 73 | 74 | - AutoPEP8 can help you fix some of the easy redundant errors: 75 | 76 | $ pip install autopep8 77 | $ autopep8 path/to/pep8.py 78 | 79 | pambox follows [Pandas' conventions](https://github.com/pandas-dev/pandas/blob/7a2bcb6605bacea858ec14cfac424898deb568b3/.github/CONTRIBUTING.md#contributing-your-changes-to-pandas) for commit messages, with common prefixes. Here are their guidelines for when to use them: 80 | 81 | - ENH: Enhancement, new functionality 82 | - BUG: Bug fix 83 | - DOC: Additions/updates to documentation 84 | - TST: Additions/updates to tests 85 | - BLD: Updates to the build process/scripts 86 | - PERF: Performance improvement 87 | - CLN: Code cleanup 88 | 89 | 90 | Style 91 | ----- 92 | 93 | - Python code should follow the [PEP 8 Style Guide][pep8]. 94 | - Python docstrings should follow the [NumPy documentation format][numpydoc]. 95 | 96 | ### Imports 97 | 98 | Imports should be one per line. 99 | Imports should be grouped into standard library, third-party, 100 | and intra-library imports. `from` import should follow "regular" `imports`. 101 | Within each group the imports should be alphabetized. 102 | Here's an example: 103 | 104 | ```python 105 | import sys 106 | from glob import glob 107 | 108 | import numpy as np 109 | 110 | from pambox.utils import setdbspl 111 | ``` 112 | 113 | Imports of scientific Python libraries should follow these conventions: 114 | 115 | ```python 116 | import matplotlib.pyplot as plt 117 | import numpy as np 118 | import pandas as pd 119 | import scipy as sp 120 | ``` 121 | 122 | 123 | Thanks! 124 | 125 | [pep8]: http://legacy.python.org/dev/peps/pep-0008/ 126 | [numpydoc]: https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt 127 | 128 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Alexandre Chabot-Leclerc 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the Technical University of Denmark nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include AUTHORS.rst 3 | include CHANGES.md 4 | include CONTRIBUTING.md 5 | include LICENSE.txt 6 | include README.rst 7 | # All source files 8 | recursive-include pambox * 9 | # All docs 10 | recursive-include examples *.py *.ipnb 11 | recursive-include docs * 12 | include tox.ini 13 | # Exclude what we don't want 14 | prune docs/_build 15 | prune pambox/tests 16 | prune */__pycache__ 17 | global-exclude *.pyc *~ *.bak *.swp *.pyo 18 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: docs release clean build html 2 | 3 | clean: 4 | rm -rf ${HOME}/pambox_test_env htmlcov 5 | 6 | build: clean 7 | export PATH=~/miniconda/bin:${PATH} 8 | conda create -p ${HOME}/pambox_test_env --yes --file requirements.txt pip \ 9 | && source activate pambox_test_env \ 10 | && python setup.py install 11 | 12 | test: clean build 13 | export PATH=~/miniconda/bin:${PATH} 14 | source activate pambox_test_env \ 15 | && conda install --yes --file testing_requirements.txt \ 16 | && coverage run --source=pambox setup.py test \ 17 | && coverage html \ 18 | && coverage report 19 | 20 | docs: html 21 | cd docs; make html 22 | 23 | release: test docs 24 | vim pambox/__init__.py 25 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Python Auditory Modeling Toolbox 2 | ================================ 3 | 4 | .. image:: https://travis-ci.org/achabotl/pambox.svg?branch=develop 5 | :target: https://travis-ci.org/achabotl/pambox 6 | .. image:: https://readthedocs.org/projects/pambox/badge/?version=latest 7 | :target: http://pambox.readthedocs.io/en/latest/?badge=latest 8 | :alt: Documentation Status 9 | 10 | pambox is a Python toolbox to facilitate the development of auditory 11 | models, with a focus on speech intelligibility prediction models. 12 | 13 | The project is maintained by `@AlexChabotL `_. 14 | 15 | pambox provides a consistent API for speech intelligibility models, 16 | inspired by `Scikit-learn `_, to facilitate 17 | comparisons across models. 18 | 19 | Links: 20 | ~~~~~~ 21 | 22 | - Official source code repo: https://github.com/achabotl/pambox 23 | - HTML documentations: http://pambox.readthedocs.org 24 | - Issue tracker: https://github.com/achabotl/pambox/issues 25 | - Mailing list: python-pambox@googlegroups.com 26 | - Mailing list archive: https://groups.google.com/d/forum/python-pambox 27 | 28 | Dependencies 29 | ------------ 30 | 31 | pambox is tested to work under Python 2.7 and Python 3.4 (thanks to 32 | ``six``). Only Mac OS X (10.9) has been tested thoroughly. 33 | 34 | The main dependencies are : 35 | 36 | - `Numpy `_ >= 1.8.0, 37 | - `Scipy `_ >=0.14.0, 38 | - `Pandas `_ >=0.14.1, 39 | - `six `_ >=1.7.2 (to have a single 40 | codebase for Python 2 and Python 3). 41 | - `ipython-notebook `_ >= 2.3.1 (for parallel experiments) 42 | 43 | Lower versions of these packages are likely to work as well but have not been 44 | thoroughly tested. 45 | 46 | `pyaudio `_ is required if you 47 | want to use the ``audio`` module. 48 | 49 | For running tests, you will need `pytest `_ and `pytest-cov `_. 50 | 51 | Install 52 | ------- 53 | 54 | Right now, `pambox` is only avaible through Github. It should be available 55 | via `pip` soon. To install pambox from source:: 56 | 57 | git clone https://github.com/achabotl/pambox.git 58 | cd pambox 59 | python setup.py install 60 | 61 | If you need more details, see the 62 | [Installation](https://github.com/achabotl/pambox/wiki/Installation) page on 63 | the wiki. 64 | 65 | 66 | Contributing 67 | ------------ 68 | 69 | You can check out the latest source and install it for development with: 70 | 71 | :: 72 | 73 | git clone https://github.com/achabotl/pambox.git 74 | cd pambox 75 | python setup.py develop 76 | 77 | To run tests (you will need `pytest`), from the root pambox folder, type: 78 | 79 | :: 80 | 81 | python setup.py test 82 | 83 | License 84 | ------- 85 | 86 | pambox is licensed under the New BSD License (3-clause BSD license). 87 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pytest 3 | 4 | 5 | def pytest_addoption(parser): 6 | parser.addoption("--runslow", action="store_true", 7 | help="run slow tests") 8 | 9 | def pytest_runtest_setup(item): 10 | if 'slow' in item.keywords and not item.config.getoption("--runslow"): 11 | pytest.skip("need --runslow option to run") 12 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pambox.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pambox.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pambox" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pambox" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /docs/_static/.placeholder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/docs/_static/.placeholder -------------------------------------------------------------------------------- /docs/audio/index.rst: -------------------------------------------------------------------------------- 1 | Audio 2 | ===== 3 | 4 | The :mod:`~pambox.audio` module provides a single function, 5 | :py:func:`~pambox.audio.play`. By default, the output is scaled to 6 | prevent clipping and the sampling frequency is 44.1 KHz. Here a simple 7 | example where we play some white noise created with NumPy:: 8 | 9 | from pambox import audio 10 | import numpy as np 11 | audio.play(np.random.randn(10000)) 12 | 13 | To play back the signal without normalization, simply set ``normalize`` to 14 | ``False``. Be careful here! `It might get loud! `_):: 15 | 16 | audio.play(np.random.randn(10000), normalize=False) 17 | 18 | 19 | API 20 | --- 21 | 22 | .. automodule:: pambox.audio 23 | :members: 24 | -------------------------------------------------------------------------------- /docs/central/index.rst: -------------------------------------------------------------------------------- 1 | Central auditory processing 2 | =========================== 3 | 4 | 5 | The :mod:`~pambox.central` module regroups what is *not* considered to be part of 6 | the outer, middle, or inner ear. It's a rather broad concept. 7 | 8 | It contains: 9 | 10 | - An Equalization--Cancellation (EC) stage, in :py:class:`~pambox.central.EC`. 11 | - An implementation of the EPSM modulation filterbank in :py:class:`~.pambox.central.EPSMModulationFilterbank`. 12 | - An Ideal Observer, :py:class:`~pambox.central.IdealObs`, as used in the 13 | :py:class:`~pambox.speech.Sepsm` model. 14 | 15 | 16 | API 17 | --- 18 | 19 | .. automodule:: pambox.central 20 | :members: 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # pambox documentation build configuration file, created by 4 | # sphinx-quickstart on Wed Jan 22 17:15:54 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | 21 | 22 | import mock 23 | 24 | MOCK_MODULES = [ 25 | 'IPython', 26 | 'numpy', 27 | 'numpy.fft', 28 | 'pyaudio', 29 | 'scipy', 30 | 'scipy.stats', 31 | 'scipy.optimize', 32 | 'scipy.fftpack', 33 | 'scipy.io', 34 | 'scipy.signal', 35 | 'matplotlib', 36 | 'mpl_toolkits', 37 | 'mpl_toolkits.axes_grid1', 38 | 'scipy.io.wavfile', 39 | 'scipy.special', 40 | 'matplotlib.pyplot', 41 | 'pandas', 42 | ] 43 | for mod_name in MOCK_MODULES: 44 | sys.modules[mod_name] = mock.Mock() 45 | 46 | 47 | # -- General configuration ----------------------------------------------------- 48 | 49 | # If your documentation needs a minimal Sphinx version, state it here. 50 | #needs_sphinx = '1.0' 51 | 52 | # Add any Sphinx extension module names here, as strings. They can be extensions 53 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 54 | extensions = [ 55 | 'sphinx.ext.autodoc', 56 | 'sphinx.ext.mathjax', 57 | 'sphinx.ext.viewcode', 58 | 'numpydoc', 59 | 'sphinx.ext.autosummary' 60 | ] 61 | 62 | # Prevent "toctree contains reference to nonexisting document" warnings 63 | numpydoc_show_class_members = False 64 | numpydoc_class_members_toctree = True 65 | 66 | # Add any paths that contain templates here, relative to this directory. 67 | templates_path = ['_templates'] 68 | 69 | # The suffix of source filenames. 70 | source_suffix = '.rst' 71 | 72 | # The encoding of source files. 73 | #source_encoding = 'utf-8-sig' 74 | 75 | # The master toctree document. 76 | master_doc = 'index' 77 | 78 | # General information about the project. 79 | project = u'pambox' 80 | copyright = u'2014-2016, Alexandre Chabot-Leclerc' 81 | 82 | # The version info for the project you're documenting, acts as replacement for 83 | # |version| and |release|, also used in various other places throughout the 84 | # built documents. 85 | 86 | import pkg_resources 87 | try: 88 | release = pkg_resources.get_distribution(project).version 89 | except pkg_resources.DistributionNotFound: 90 | print 'To build the documentation, The distribution information of pambox()' 91 | print 'Has to be available. Either install the package into your' 92 | print 'development environment or run "setup.py develop" to setup the' 93 | print 'metadata. A virtualenv is recommended!' 94 | sys.exit(1) 95 | del pkg_resources 96 | 97 | # The short X.Y version. 98 | version = '.'.join(release.split('.')[:2]) 99 | # The full version, including alpha/beta/rc tags. 100 | release = '' 101 | 102 | # The language for content autogenerated by Sphinx. Refer to documentation 103 | # for a list of supported languages. 104 | #language = None 105 | 106 | # There are two options for replacing |today|: either, you set today to some 107 | # non-false value, then it is used: 108 | #today = '' 109 | # Else, today_fmt is used as the format for a strftime call. 110 | #today_fmt = '%B %d, %Y' 111 | 112 | # List of patterns, relative to source directory, that match files and 113 | # directories to ignore when looking for source files. 114 | exclude_patterns = ['_build', 'tests'] 115 | 116 | # The reST default role (used for this markup: `text`) to use for all documents. 117 | #default_role = None 118 | 119 | # If true, '()' will be appended to :func: etc. cross-reference text. 120 | #add_function_parentheses = True 121 | 122 | # If true, the current module name will be prepended to all description 123 | # unit titles (such as .. function::). 124 | #add_module_names = True 125 | 126 | # If true, sectionauthor and moduleauthor directives will be shown in the 127 | # output. They are ignored by default. 128 | #show_authors = False 129 | 130 | # The name of the Pygments (syntax highlighting) style to use. 131 | pygments_style = 'sphinx' 132 | 133 | # A list of ignored prefixes for module index sorting. 134 | #modindex_common_prefix = [] 135 | 136 | 137 | # -- Options for HTML output --------------------------------------------------- 138 | 139 | # The theme to use for HTML and HTML Help pages. See the documentation for 140 | # a list of builtin themes. 141 | import os 142 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 143 | if on_rtd: 144 | html_theme = 'default' 145 | else: 146 | html_theme = 'alabaster' 147 | 148 | # Theme options are theme-specific and customize the look and feel of a theme 149 | # further. For a list of options available for each theme, see the 150 | # documentation. 151 | #html_theme_options = {} 152 | 153 | # Add any paths that contain custom themes here, relative to this directory. 154 | #html_theme_path = [] 155 | 156 | # The name for this set of Sphinx documents. If None, it defaults to 157 | # " v documentation". 158 | #html_title = None 159 | 160 | # A shorter title for the navigation bar. Default is the same as html_title. 161 | #html_short_title = None 162 | 163 | # The name of an image file (relative to this directory) to place at the top 164 | # of the sidebar. 165 | #html_logo = None 166 | 167 | # The name of an image file (within the static path) to use as favicon of the 168 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 169 | # pixels large. 170 | #html_favicon = None 171 | 172 | # Add any paths that contain custom static files (such as style sheets) here, 173 | # relative to this directory. They are copied after the builtin static files, 174 | # so a file named "default.css" will overwrite the builtin "default.css". 175 | html_static_path = ['_static'] 176 | 177 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 178 | # using the given strftime format. 179 | #html_last_updated_fmt = '%b %d, %Y' 180 | 181 | # If true, SmartyPants will be used to convert quotes and dashes to 182 | # typographically correct entities. 183 | #html_use_smartypants = True 184 | 185 | # Custom sidebar templates, maps document names to template names. 186 | #html_sidebars = {} 187 | 188 | # Additional templates that should be rendered to pages, maps page names to 189 | # template names. 190 | #html_additional_pages = {} 191 | 192 | # If false, no module index is generated. 193 | #html_domain_indices = True 194 | 195 | # If false, no index is generated. 196 | #html_use_index = True 197 | 198 | # If true, the index is split into individual pages for each letter. 199 | #html_split_index = False 200 | 201 | # If true, links to the reST sources are added to the pages. 202 | #html_show_sourcelink = True 203 | 204 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 205 | #html_show_sphinx = True 206 | 207 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 208 | #html_show_copyright = True 209 | 210 | # If true, an OpenSearch description file will be output, and all pages will 211 | # contain a tag referring to it. The value of this option must be the 212 | # base URL from which the finished HTML is served. 213 | #html_use_opensearch = '' 214 | 215 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 216 | #html_file_suffix = None 217 | 218 | # Output file base name for HTML help builder. 219 | htmlhelp_basename = 'pamboxdoc' 220 | 221 | 222 | # -- Options for LaTeX output -------------------------------------------------- 223 | 224 | latex_elements = { 225 | # The paper size ('letterpaper' or 'a4paper'). 226 | #'papersize': 'letterpaper', 227 | 228 | # The font size ('10pt', '11pt' or '12pt'). 229 | #'pointsize': '10pt', 230 | 231 | # Additional stuff for the LaTeX preamble. 232 | #'preamble': '', 233 | } 234 | 235 | # Grouping the document tree into LaTeX files. List of tuples 236 | # (source start file, target name, title, author, documentclass [howto/manual]). 237 | latex_documents = [ 238 | ('index', 'pambox.tex', u'pambox Documentation', 239 | u'Author', 'manual'), 240 | ] 241 | 242 | # The name of an image file (relative to this directory) to place at the top of 243 | # the title page. 244 | #latex_logo = None 245 | 246 | # For "manual" documents, if this is true, then toplevel headings are parts, 247 | # not chapters. 248 | #latex_use_parts = False 249 | 250 | # If true, show page references after internal links. 251 | #latex_show_pagerefs = False 252 | 253 | # If true, show URL addresses after external links. 254 | #latex_show_urls = False 255 | 256 | # Documents to append as an appendix to all manuals. 257 | #latex_appendices = [] 258 | 259 | # If false, no module index is generated. 260 | #latex_domain_indices = True 261 | 262 | 263 | # -- Options for manual page output -------------------------------------------- 264 | 265 | # One entry per manual page. List of tuples 266 | # (source start file, name, description, authors, manual section). 267 | man_pages = [ 268 | ('index', 'pambox', u'pambox Documentation', 269 | [u'Author'], 1) 270 | ] 271 | 272 | # If true, show URL addresses after external links. 273 | #man_show_urls = False 274 | 275 | 276 | # -- Options for Texinfo output ------------------------------------------------ 277 | 278 | # Grouping the document tree into Texinfo files. List of tuples 279 | # (source start file, target name, title, author, 280 | # dir menu entry, description, category) 281 | texinfo_documents = [ 282 | ('index', 'pambox', u'pambox Documentation', 283 | u'Alexandre Chabot-Leclerc', 'pambox', 'One line description of project.', 284 | 'Miscellaneous'), 285 | ] 286 | 287 | # Documents to append as an appendix to all manuals. 288 | #texinfo_appendices = [] 289 | 290 | # If false, no module index is generated. 291 | #texinfo_domain_indices = True 292 | 293 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 294 | #texinfo_show_urls = 'footnote' 295 | 296 | 297 | # -- Options for Epub output --------------------------------------------------- 298 | 299 | # Bibliographic Dublin Core info. 300 | epub_title = u'pambox' 301 | epub_author = u'Author' 302 | epub_publisher = u'Author' 303 | epub_copyright = u'2014, Author' 304 | 305 | # The language of the text. It defaults to the language option 306 | # or en if the language is not set. 307 | #epub_language = '' 308 | 309 | # The scheme of the identifier. Typical schemes are ISBN or URL. 310 | #epub_scheme = '' 311 | 312 | # The unique identifier of the text. This can be a ISBN number 313 | # or the project homepage. 314 | #epub_identifier = '' 315 | 316 | # A unique identification for the text. 317 | #epub_uid = '' 318 | 319 | # A tuple containing the cover image and cover page html template filenames. 320 | #epub_cover = () 321 | 322 | # HTML files that should be inserted before the pages created by sphinx. 323 | # The format is a list of tuples containing the path and title. 324 | #epub_pre_files = [] 325 | 326 | # HTML files shat should be inserted after the pages created by sphinx. 327 | # The format is a list of tuples containing the path and title. 328 | #epub_post_files = [] 329 | 330 | # A list of files that should not be packed into the epub file. 331 | #epub_exclude_files = [] 332 | 333 | # The depth of the table of contents in toc.ncx. 334 | #epub_tocdepth = 3 335 | 336 | # Allow duplicate toc entries. 337 | #epub_tocdup = True 338 | -------------------------------------------------------------------------------- /docs/distort/index.rst: -------------------------------------------------------------------------------- 1 | Signal Distortion and Processing 2 | ================================ 3 | 4 | The :mod:`~pambox.distort` module groups together various distortions and 5 | types of processing that can be applied to signals. 6 | 7 | 8 | * :func:`~pambox.distort.mix_noise` adds together two signals at a given SNR. 9 | * :func:`~pambox.distort.noise_from_signal` creates a noise with the same 10 | spectrum as the input signal. Optionally, it can also keep the signal's 11 | envelope. 12 | * :func:`~pambox.distort.overlap_and_add` reconstructs a signal using the 13 | overlap and add method. 14 | * :func:`~pambox.distort.phase_jitter` applies phase jitter to a signal. 15 | * :func:`~pambox.distort.spec_sub` applies spectral subtraction to a signal. 16 | 17 | 18 | API 19 | --- 20 | 21 | .. automodule:: pambox.distort 22 | :members: 23 | 24 | 25 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | pambox 2 | ====== 3 | 4 | `pambox `_ is a Python package to 5 | facilitate the development of auditory models, with a focus on speech 6 | intelligibility prediction models. 7 | 8 | The Grand Idea is for `pambox` to be a repository of published auditory models, 9 | as well as a simple and powerful tool for developing auditory models. 10 | Components should be reusable and easy to modify. 11 | `pambox` uses a standard interface for all speech intelligibility prediction 12 | models in the package, which should simplify comparisons across models. 13 | 14 | In case Python is not your thing and you prefer Matlab, the `Auditory Modeling 15 | Toolbox `_ is an excellent alternative. 16 | 17 | Installing 18 | ---------- 19 | 20 | Right now, `pambox` is only available through Github. It should be available 21 | via `pip` soon. To install `pambox` from source:: 22 | 23 | git clone https://github.com/achabotl/pambox.git 24 | cd pambox 25 | python setup.py install 26 | 27 | You'll also need all the requirements in `requirements.txt `_. If you use `conda 29 | `_, you can simply run the 30 | following to install all the dependencies:: 31 | 32 | conda install --file requirements.txt 33 | 34 | 35 | Structure of the toolbox 36 | ------------------------ 37 | 38 | The structure of the toolbox is inspired by the auditory system. The classes 39 | and functions are split between "peripheral" and "central" parts. The 40 | "peripheral" part is directly accessible through an :mod:`~pambox.inner`, 41 | a :mod:`~pambox.middle`, and an :mod:`~pambox.outer` module. 42 | The :mod:`~pambox.central` part is more general and contains the 43 | modules and functions for central processes, without much extra separation 44 | for now. 45 | 46 | The :mod:`~pambox.speech` module contains speech intelligibility models and 47 | various functions and classes to facilitate speech intelligibility prediction 48 | experiments. 49 | 50 | The :mod:`~pambox.utils` module contains functions for manipulating 51 | signals, such as setting levels, or padding signals, that are not directly 52 | auditory processes. 53 | 54 | The :mod:`~pambox.distort` module contains distortions and processes that 55 | can be applied to signals. Most of them are used in speech intelligibility 56 | experiments. 57 | 58 | The :mod:`~pambox.audio` module is a thin wrapper around `pyaudio 59 | `_ that simplifies the playback of 60 | numpy arrays, which is often useful for debugging. 61 | 62 | Conventions 63 | ----------- 64 | In the `spirit of Python `_, pambox has 65 | a few conventions about "the way to do things". 66 | 67 | * Single channels signals always have the shape ``(N, )``, where `N` is the 68 | length of the signal. 69 | * Multi-channels signals always have the shape ``(M, N)``, where `M` is the 70 | number of channels and `N` is the signals' length. This greatly simplifies 71 | looping over channels. 72 | * Filterbanks are classes with names ending in ``Filterbank`` and must take 73 | at least the sampling frequency and the center frequencies as 74 | arguments, for example: ``GeneralFilterbank(fs, center_frequencies=(100, 75 | 200), *args, **kgwars)``. ``center_frequencies`` can have a default 76 | value. Filtering is done via a ``filter`` method that only takes the 77 | signal to filter and return multi-channel signals, for example: 78 | ``GeneralFilterbank(fs=44100).filter(x)`` returns a ``(M, N)`` array, where 79 | ``M`` can be 1. 80 | * Speech intelligibility models are classes with a ``predict`` method. See 81 | :ref:`speech-intelligibility-models` for more details. 82 | 83 | 84 | Contents 85 | -------- 86 | 87 | .. toctree:: 88 | :maxdepth: 2 89 | 90 | audio/index 91 | inner/index 92 | middle/index 93 | outer/index 94 | central/index 95 | speech/index 96 | distort/index 97 | utils/index 98 | 99 | 100 | Indices and tables 101 | ================== 102 | 103 | * :ref:`genindex` 104 | * :ref:`modindex` 105 | * :ref:`search` 106 | 107 | -------------------------------------------------------------------------------- /docs/inner/index.rst: -------------------------------------------------------------------------------- 1 | Inner ear processing 2 | ==================== 3 | 4 | This module groups properties and processes of the inner ear, namely 5 | peripheral filtering and envelope extraction. 6 | 7 | Filterbanks 8 | ----------- 9 | 10 | All filterbanks provide a ``filter()`` method that takes only the input signal. 11 | The filterbank's parameters must be defined when creating the filterbank. For example, 12 | here we create a Gammatone filterbank for a sampling frequency of 44.1 kHz and a sequence 13 | of octave-spaced center frequencies:: 14 | 15 | >>> import numpy as np 16 | >>> from pambox.inner import GammatoneFilterbank 17 | >>> g = GammatoneFilterbank(44100, [250, 500, 1000, 2000, 4000]) 18 | >>> x = np.random.randn(2 * 44100) 19 | >>> y = g.filter(x) 20 | >>> y.shape 21 | (5, 88200) 22 | 23 | * :class:`~pambox.inner.GammatoneFilterbank` is a gammatone filterbank which uses Malcom Slaney's implementation. 24 | * :class:`~pambox.inner.RectangularFilterbank` performs bandpass filtering of a signal using rectangular filters. 25 | 26 | 27 | Envelope extraction 28 | ------------------- 29 | 30 | * :func:`~pambox.inner.hilbert_envelope` extracts the Hilbert envelope of a 31 | signal. 32 | * :func:`~pambox.inner.lowpass_env_filtering` low-pass filters a signal using 33 | a Butterworth filter. 34 | 35 | 36 | Other functions 37 | --------------- 38 | 39 | * :func:`~pambox.inner.erb_bandwidth` gives the ERB bandwidth for a given center 40 | frequencies. 41 | 42 | 43 | API 44 | --- 45 | 46 | .. automodule:: pambox.inner 47 | :members: 48 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pambox.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pambox.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /docs/middle/index.rst: -------------------------------------------------------------------------------- 1 | Middle ear processes 2 | ==================== 3 | 4 | There's nothing here right now. 5 | 6 | API 7 | --- 8 | 9 | .. automodule:: pambox.middle 10 | :members: 11 | -------------------------------------------------------------------------------- /docs/outer/index.rst: -------------------------------------------------------------------------------- 1 | Outer ear processes 2 | =================== 3 | 4 | There's nothing here right now, but there should be some things to access 5 | HRTF databases pretty soon. 6 | 7 | API 8 | --- 9 | 10 | .. automodule:: pambox.outer 11 | :members: 12 | -------------------------------------------------------------------------------- /docs/speech/index.rst: -------------------------------------------------------------------------------- 1 | Speech Intelligibility Models and Experiments 2 | ============================================= 3 | 4 | Introduction 5 | ------------ 6 | 7 | The :mod:`~pambox.speech` module groups together speech intelligibility models 8 | and other tools to facilitate the creation of speech intelligibility 9 | prediction "experiments". 10 | 11 | 12 | .. _speech-intelligibility-models: 13 | 14 | Speech Intelligibility Models 15 | ----------------------------- 16 | 17 | Speech intelligibility models are classes that take at least a ``fs`` 18 | argument. All predictions are done via a ``predict`` method with the 19 | signature: ``predict(clean=None, mix=None, noise=None)``. 20 | This signature allows models to require only a subset of the inputs. For example, 21 | blind models might only require the mixture of processed speech and noise: ``predict(mix=noisy_speech)``; or just the 22 | clean signal and the noise: ``predict(clean=speech, noise=noise)``. 23 | 24 | The reference level is that a signal with an RMS value of 1 corresponds to 0 dB SPL. 25 | 26 | Here is a small example, assuming that we have access to two signals, ``mix`` which is a mixture of the clean speech 27 | and the noise, and ``noise``, which is the noise alone. 28 | 29 | :: 30 | 31 | >>> from pambox.speech import Sepsm 32 | >>> s = Sepsm(fs=22050) 33 | >>> res = s.predict(mix=mix, noise=noise) 34 | 35 | 36 | For models that do not take time signals as inputs, 37 | such as the :py:class:`~pambox.speech.Sii`, two other types of interfaces are 38 | defined: 39 | 40 | * ``predict_spec`` if the model takes frequency spectra as its inputs. Once 41 | again, the spectra of the clean speech, of the mixture, and of the noise 42 | should be provided:: 43 | 44 | >>> from pambox.speech import Sii 45 | >>> s = Sii(fs=22050) 46 | >>> res = s.predict_spec(clean=clean_spec, noise=noise_spec) 47 | 48 | 49 | * ``predict_ir`` if the models takes impulse responses as its inputs. The 50 | function then takes two inputs, the impulse response to the target, 51 | and the concatenated impulse responses to the maskers:: 52 | 53 | >>> from pambox.speech import IrModel 54 | >>> s = IrModel(fs=22050) 55 | >>> res = s.predict_ir(clean_ir, noise_irs) 56 | 57 | Intelligibility models return a dictionary with **at least** the following key: 58 | 59 | * ``p`` (for "predictions"): which is a dictionary with the outputs of the 60 | model. They keys are the names of the outputs. This allows models to have 61 | multiple return values. For example, the :py:class:`~pambox.speech.MrSepsm` 62 | returns two prediction values:: 63 | 64 | >>> s = MrSepsm(fs=22050) 65 | >>> res = s.predict(clean, mix, noise) 66 | >>> res['p'] 67 | {'lt_snr_env': 10.5, 'snr_env': 20.5} 68 | 69 | It might seem a bit over-complicated, but it allows for an easier storing of 70 | the results of an experiment. 71 | 72 | Additionally, the models can add any other keys to the results dictionary. For 73 | example, a model can return some of its internal attributes, its internal 74 | representation, etc. 75 | 76 | 77 | .. _speech-materials: 78 | 79 | Speech Materials 80 | ---------------- 81 | 82 | The :py:class:`~pambox.speech.Material` class simplifies the 83 | access to the speech files when doing speech intelligibility prediction 84 | experiments. 85 | 86 | When creating the class, you have to define: 87 | 88 | * where the sentences can be found 89 | * their sampling frequency 90 | * their reference level, in dB SPL (the reference is that a signal with an 91 | RMS value of 1 corresponds to 0 dB SPL), 92 | * as well as the path to a file where the corresponding speech-shaped noise for 93 | this particular material can be found. 94 | 95 | For example, to create a speech material object for IEEE sentences stored in 96 | the `../stimuli/ieee` folder:: 97 | 98 | >>> sm = SpeechMaterial( 99 | ... fs=25000, 100 | ... path_to_sentences='../stimuli/ieee', 101 | ... path_to_ssn='ieee_ssn.wav', 102 | ... ref_level=74 103 | ... name='IEEE' 104 | ... ) 105 | 106 | Each speech file can be loaded using its name:: 107 | 108 | >>> x = sm.load_file(sm.files[0]) 109 | 110 | Or files can be loaded as an iterator:: 111 | 112 | >>> all_files = sm.load_files() 113 | >>> for x in all_files: 114 | ... # do some processing on `x` 115 | ... pass 116 | 117 | 118 | By default, the list of files is simply all the files found in 119 | the ``path_to_sentences``. To overwrite this behavior, simply replace the 120 | :py:func:`~pambox.speech.Material.files_list` function:: 121 | 122 | >>> def new_files_list(): 123 | ... return ['file1.wav', 'file2.wav'] 124 | >>> sm.files_list = new_files_list 125 | 126 | It is common that individual sentences of a speech material are not adjusted 127 | to the exact same level. This is typically done to compensate for differences 128 | in intelligibility between sentences. In order to keep the inter-sentence 129 | level difference, it is recommended to use the 130 | :py:func:`~pambox.speech.Material.set_level` method of the speech material. 131 | The code below sets the level of the first sentence to 65 dB SPL, 132 | with the reference that a signal with an RMS value of 1 has a level of 0 dB SPL. 133 | 134 | >>> x = sm.load_file(sm.files[0]) 135 | >>> adjusted_x = sm.set_level(x, 65) 136 | 137 | Accessing the speech-shaped noise corresponding the speech material is done 138 | using the :func:`~pambox.speech.Material.ssn` function: 139 | 140 | >>> ieee_ssn = sm.ssn() 141 | 142 | By default, this will return the entirety of the SSN. However, it is often 143 | required to select a section of noise that is the same length as a target 144 | speech signal, therefore, you can get a random portion of the SSN of the same 145 | length as the signal `x` using: 146 | 147 | >>> ssn_section = sm.ssn(x) 148 | 149 | If you are given a speech material but you don't know it's average level, you 150 | can use the help function :func:`~pambox.speech.Material.average_level` to 151 | find the average leve, in dB, of all the sentences in the speech material: 152 | 153 | >>> average_level = sm.average_level() 154 | 155 | .. _speech-intelligibility-experiments: 156 | 157 | Speech Intelligibility Experiment 158 | --------------------------------- 159 | 160 | Performing speech intelligibility experiments usually involves a tedious 161 | process of looping through all conditions to study, such as different SNRs, 162 | processing conditions, and sentences. The :class:`~pambox.speech.Experiment` 163 | class simplifies and automates the process of going through all the 164 | experimental conditions. It also gathers all the results in a way that is 165 | simple to manipulate, transform, and plot. 166 | 167 | Basic Example 168 | ~~~~~~~~~~~~~ 169 | 170 | An experiment requires at least: a model, a speech material, and a list of SNRs. 171 | 172 | >>> from pambox.speech import Experiment, Sepsm, Material 173 | >>> models = Sepsm() 174 | >>> material = Material() 175 | >>> snrs = np.arange(-9,-5, 3) 176 | >>> exp = Experiment(models, material, snrs, write=False) 177 | >>> df = exp.run(2) 178 | >>> df 179 | Distortion params Model Output SNR Sentence number Value 180 | 0 None Sepsm snr_env -9 0 1.432468 181 | 1 None Sepsm snr_env -6 0 5.165170 182 | 2 None Sepsm snr_env -9 1 6.308387 183 | 3 None Sepsm snr_env -6 1 10.314227 184 | 185 | Additionally, you can assign a type of processing, such as reverberation, 186 | spectral subtraction, or any arbitrary type of processing. To keep things 187 | simply, let's apply a compression to the mixture and to the noise. Your 188 | distortion function *must return* the clean speech, the mixture, and the 189 | noise alone. 190 | 191 | >>> def compress(clean, noise, power): 192 | ... mixture = (clean + noise) ** (1 / power) 193 | ... noise = noise ** (1 / power) 194 | ... return clean, mixture, noise 195 | ... 196 | >>> powers = range(1, 4) 197 | >>> exp = Experiment(models, material, snrs, mix_signals, powers) 198 | >>> df = exp.run(2) 199 | >>> df 200 | 201 | 202 | If the distortion parameters are stored in a list of dictionaries, 203 | they will be saved in separate columns in the output dataframe. Otherwise, 204 | they will be saved as tuples in the "Distortion params" column. 205 | 206 | 207 | API 208 | --- 209 | 210 | .. automodule:: pambox.speech 211 | :members: 212 | -------------------------------------------------------------------------------- /docs/utils/index.rst: -------------------------------------------------------------------------------- 1 | Utilities 2 | ========= 3 | 4 | The :mod:`~pambox.utils` groups together function that are not auditory 5 | processes but that are, nonetheless, useful or essential to manipulate signals. 6 | 7 | Signal levels 8 | ------------- 9 | 10 | `pambox` defines a reference level for digital signals. The convention is 11 | that a signal with a root-mean-square (RMS) value of 1 corresponds to a level 12 | of 0 dB SPL. In other words: 13 | 14 | .. math:: 15 | L [dB SPL] = 20 * \log_{10}\frac{P}{Ref}, 16 | 17 | where :math:`Ref` is 1. 18 | 19 | The functions :py:func:`~pambox.utils.setdbspl`, 20 | :py:func:`~pambox.utils.dbspl`, and :py:func:`~pambox.utils.rms` help in 21 | doing this conversion. 22 | 23 | Adding signals and adjusting their lengths 24 | ------------------------------------------ 25 | 26 | Adding together signals loaded from disks is often problematic because they 27 | tend to have different lengths. The functions 28 | :py:func:`~pambox.utils.add_signals` and 29 | :py:func:`~pambox.utils.make_same_length` simplify this. The former simply 30 | adds two signals and pads the shortest one with zeros if necessary. The 31 | latter force two signals to be of the same lengths by either zero-padding the 32 | shortest (default) or by cutting the second signal to the length of the 33 | first, for example:: 34 | 35 | >>> a = [1, 1] 36 | >>> b = [2, 2, 2] 37 | >>> make_same_length(a, b) 38 | [1, 1, 0], [2, 2, 2] 39 | >>> make_same_length(a, b, extend_first=False) 40 | [1, 1], [2, 2] 41 | 42 | This can be useful when using models operating in the envelope domain, 43 | as padding with zeros increase the energy at low modulation frequencies. 44 | 45 | 46 | The :py:func:`~pambox.utils.int2srt` function finds the speech reception 47 | threshold (SRT) for a given intelligibility curve. It is actually a more 48 | general linear interpolation function, but the most common use case in this 49 | toolbox is to find SRTs. 50 | 51 | The function :py:func:`~pambox.utils.psy_fn` calculates a psychometric 52 | function based on a mean (that would be the SRT @ 50%) and a standard 53 | deviation. This function can be useful when trying to fit a psychometric 54 | function to a series of data points. 55 | 56 | 57 | FFT Filtering and general speedups 58 | ---------------------------------- 59 | 60 | FIR filtering is rather slow when using long impulse responses. The function 61 | :py:func:`~pambox.utils.fftfilt` makes such filtering faster by executing the 62 | filtering using the overlap-and-add method in the frequency domain 63 | rather than as a convolution. It is largely inspired from the Matlab 64 | implementation and was adapted from a `suggested addition to Scipy 65 | `_. 66 | It might be removed from the toolbox if `fftfilt` becomes a part of Scipy. 67 | 68 | The function :py:func:`~pambox.utils.next_pow_2` is a convenient way to 69 | obtain the next power of two for a given integer. It's mostly useful when 70 | picking an FFT length. 71 | 72 | API 73 | --- 74 | 75 | .. automodule:: pambox.utils 76 | :members: 77 | -------------------------------------------------------------------------------- /examples/.place_holder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/examples/.place_holder -------------------------------------------------------------------------------- /optional-requirements.txt: -------------------------------------------------------------------------------- 1 | --allow-external PyAudio 2 | --allow-unverified PyAudio 3 | PyAudio 4 | -------------------------------------------------------------------------------- /pambox/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import logging 4 | 5 | __all__ = ['auditory', 'distort', 'filterbank', 'general', 'idealobs', 6 | 'speech'] 7 | 8 | __version__ = '0.1' 9 | 10 | logging.getLogger(__name__).addHandler(logging.NullHandler()) 11 | 12 | -------------------------------------------------------------------------------- /pambox/audio.py: -------------------------------------------------------------------------------- 1 | """ 2 | :mod:`~pambox.audio` provides a simple wrapper around `pyaudio` to simplify 3 | sound playback. 4 | """ 5 | from __future__ import absolute_import, division, print_function 6 | 7 | import numpy as np 8 | 9 | try: 10 | import pyaudio 11 | except ImportError: 12 | raise ImportError("pyaudio is required is you want to use pambox.audio") 13 | 14 | 15 | def play(x, fs=44100, normalize=True): 16 | """Plays sound. 17 | 18 | Parameters 19 | ---------- 20 | x : array_like, 21 | Signal to be played. The shape should be nChannels x Length. 22 | fs : int (optional) 23 | Sampling frequency. The default is 44100 Hz. 24 | normalize : bool (optional) 25 | Normalize the signal such that the maximum (absolute value) is 1 to 26 | prevent clipping. The default is True. 27 | 28 | Examples 29 | -------- 30 | 31 | To playback a numpy array: 32 | 33 | >>> from pambox import audio 34 | >>> import numpy as np 35 | >>> audio.play(np.random.randn(10000)) 36 | 37 | """ 38 | x = np.asarray(x) 39 | if normalize: 40 | x = x / np.abs(x).max() 41 | if x.shape[0] == 2: 42 | x = x.T 43 | channels = x.ndim 44 | _play_sound(x, fs=fs, channels=channels) 45 | 46 | 47 | def _play_sound(x, fs=44100, channels=1, output=1, format_=pyaudio.paFloat32): 48 | """Wrapper around PyAudio to play numpy arrays. 49 | 50 | Parameters 51 | ---------- 52 | x : ndarray 53 | Signal 54 | fs : int 55 | Sampling frequency, default is 44100 Hz. 56 | channels : int 57 | Number of channels, default is 1. 58 | output : int 59 | ID of the soundcard where to play back. Default is 1. 60 | format_: pyaudio Format object 61 | Format of the signal data, the default is `pyaudio.paFloat32`. 62 | 63 | Returns 64 | ------- 65 | None 66 | 67 | """ 68 | p = pyaudio.PyAudio() 69 | stream = p.open(format=format_, channels=channels, rate=fs, 70 | output=output) 71 | stream.write(x.astype(np.float32).tostring()) 72 | stream.close() 73 | p.terminate() 74 | -------------------------------------------------------------------------------- /pambox/central/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """:mod:`~pambox.central` contains processes performed by the 'central' auditory system. 3 | 4 | Classes 5 | ------- 6 | 7 | - :py:class:`~EC` -- An Equalization--Cancellation stage, as used by [wan2014]_. 8 | - :py:class:`~EPSMModulationFilterbank` -- EPSM modulation filterbank, as used by [jorgensen2011]_. 9 | - :py:class:`~IdealObs` -- An IdealObserver, as used by [jorgensen2011]_. 10 | 11 | 12 | """ 13 | from __future__ import absolute_import, division, print_function 14 | 15 | __all__ = ( 16 | 'EC', 17 | 'EPSMModulationFilterbank', 18 | 'IdealObs', 19 | ) 20 | 21 | from .decision_metrics import IdealObs 22 | from .ec import EC 23 | from .modulation_filterbanks import EPSMModulationFilterbank 24 | 25 | -------------------------------------------------------------------------------- /pambox/central/decision_metrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import logging 3 | 4 | import numpy as np 5 | from scipy.optimize import leastsq 6 | from scipy.stats import norm 7 | 8 | 9 | log = logging.getLogger(__name__) 10 | 11 | 12 | class IdealObs(object): 13 | """Statistical ideal observer. 14 | 15 | Converts input values (usually SNRenv) to a percentage. 16 | 17 | Parameters 18 | ---------- 19 | k : float, optional 20 | (Default value = sqrt(1.2) 21 | q : float, optional 22 | (Default value = 0.5) 23 | sigma_s : float, optional 24 | (Default value = 0.6) 25 | m : int, optional 26 | Number of words in the vocabulary. (Default value = 8000) 27 | 28 | Notes 29 | ----- 30 | Implemented as described in [jorgensen2011]_. 31 | 32 | Examples 33 | -------- 34 | 35 | Converting values to percent correct using the default parameters 36 | of the ideal observer: 37 | 38 | >>> from pambox import central 39 | >>> obs = central.IdealObs() 40 | >>> obs.transform((0, 1, 2, 3)) 41 | 42 | """ 43 | def __init__(self, k=np.sqrt(1.2), q=0.5, sigma_s=0.6, m=8000.): 44 | self.k = k 45 | self.q = q 46 | self.sigma_s = sigma_s 47 | self.m = m 48 | 49 | def get_params(self): 50 | """Returns the parameters of the ideal observer as dict. 51 | 52 | Parameters 53 | ---------- 54 | None 55 | 56 | Returns 57 | ------- 58 | params : dict 59 | Dictionary of internal parameters of the ideal observer. 60 | """ 61 | return {'k': self.k, 'q': self.q, 'sigma_s': self.sigma_s, 'm': self.m} 62 | 63 | def fit_obs(self, values, pcdata, sigma_s=None, m=None, tries=10): 64 | """Finds the parameters of the ideal observer. 65 | 66 | Finds the paramaters ``k``, ``q``, and ``sigma_s``, that minimize the 67 | least-square error between a data set and transformed SNRenv. 68 | 69 | By default the ``m`` parameter is fixed and the property ``m`` is used. 70 | It can also be defined as an optional parameter. 71 | 72 | It is also possible to fix the `sigma_s` parameter by passing it as 73 | an optional argument. Otherwise, it is optimized with `k` and `q`. 74 | 75 | Parameters 76 | ---------- 77 | values : ndarray 78 | The linear SNRenv values that are to be converted to percent 79 | correct. 80 | pcdata : ndarray 81 | The data, in percentage between 0 and 1, of correctly understood 82 | tokens. Must be the same shape as `values`. 83 | sigma_s : float, optional 84 | (Default value = None) 85 | m : float, optional 86 | (Default value = None) 87 | tries : int, optional 88 | How many attempts to fit the observer if the start values do not 89 | converge. The default is 10 times. 90 | 91 | Returns 92 | ------- 93 | self 94 | 95 | """ 96 | 97 | values = np.asarray(values) 98 | pcdata = np.asarray(pcdata) 99 | 100 | if m is None: 101 | m = self.m 102 | else: 103 | self.m = m 104 | 105 | # Set default values for optimization 106 | p0 = [self.k, self.q, self.sigma_s] 107 | fixed_params = {'m': m} 108 | if sigma_s is not None: 109 | p0 = p0[:2] 110 | fixed_params['sigma_s'] = sigma_s 111 | 112 | # Reshape the array to have `N` predictions and define the cost 113 | # function to average over those predictions. 114 | if values.shape != pcdata.shape: 115 | values = values.reshape((-1, len(pcdata))) 116 | 117 | def errfc(p, fixed): 118 | return np.mean(self._transform(values, *p, **fixed), axis=0 119 | ) - pcdata 120 | # They have the same shape, the array should not be averaged 121 | else: 122 | def errfc(p, fixed): 123 | return self._transform(values, *p, **fixed) - pcdata 124 | 125 | for try_id in range(tries): 126 | (x, _, _, errmsg, ier) = leastsq(errfc, p0, args=fixed_params, 127 | maxfev=10000, full_output=True) 128 | if ier in [1, 2, 3, 4]: 129 | break 130 | else: 131 | p0 = 2 * np.random.random_sample(len(p0)) 132 | log.error("Optimal parameters not found: " + errmsg) 133 | 134 | if sigma_s: 135 | self.k, self.q = x 136 | self.sigma_s = sigma_s 137 | else: 138 | self.k, self.q, self.sigma_s = x 139 | return self 140 | 141 | @staticmethod 142 | def _transform(values, k=None, q=None, sigma_s=None, m=None): 143 | """Converts SNRenv values to percent correct using an ideal observer. 144 | 145 | Parameters 146 | ---------- 147 | values : array_like 148 | linear values of SNRenv 149 | k : float 150 | k parameter (Default value = None) 151 | q : float 152 | q parameter (Default value = None) 153 | sigma_s : float 154 | sigma_s parameter (Default value = None) 155 | m : float 156 | m parameter, number of words in the vocabulary. (Default value = 157 | None) 158 | 159 | Returns 160 | ------- 161 | pc : ndarray 162 | Array of intelligibility percentage values, of the same shape as 163 | `values`. 164 | 165 | """ 166 | un = norm.ppf(1.0 - 1.0 / m) 167 | sn = 1.28255 / un 168 | un += 0.577 / un 169 | dp = k * values ** q 170 | return norm.cdf(dp, un, np.sqrt(sigma_s ** 2 + sn ** 2)) * 100 171 | 172 | def transform(self, values): 173 | """Converts inputs values to a percent correct. 174 | 175 | Parameters 176 | ---------- 177 | values : array_like 178 | Linear values to transform. 179 | 180 | Returns 181 | ------- 182 | pc : ndarray 183 | Array of intelligibility percentage values, of the same shape as 184 | `values`. 185 | 186 | """ 187 | values = np.asarray(values) 188 | return self._transform(values, self.k, self.q, self.sigma_s, self.m) -------------------------------------------------------------------------------- /pambox/central/modulation_filterbanks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import logging 4 | 5 | import numpy as np 6 | from numpy import pi 7 | try: 8 | _ = np.use_fastnumpy # Use Enthought MKL optimizations 9 | from numpy.fft import fft, ifft, rfft, irfft 10 | except AttributeError: 11 | try: 12 | import mklfft # MKL FFT optimizations from Continuum Analytics 13 | from numpy.fft import fft, ifft, rfft, irfft 14 | except ImportError: 15 | # Finally, just use Numpy's and Scipy's 16 | from scipy.fftpack import fft, ifft 17 | from numpy.fft import rfft, irfft 18 | 19 | log = logging.getLogger(__name__) 20 | 21 | 22 | class EPSMModulationFilterbank(object): 23 | """Implementation of the EPSM modulation filterbank. 24 | 25 | Parameters 26 | ---------- 27 | fs : int 28 | Sampling frequency of the signal. 29 | modf : array_like 30 | List of the center frequencies of the modulation filterbank. 31 | q : float 32 | Q-factor of the modulation filters. Defaults to 1. 33 | low_pass_order : float 34 | Order of the low-pass filter. Defaults to 3. 35 | 36 | Methods 37 | ------- 38 | filter(signal) 39 | Filters the signal using the modulation filterbank. 40 | 41 | Notes 42 | ----- 43 | The envelope power spectrum model (EPSM) filterbank was defined in 44 | [ewert2000]_ and the implementation was validated against the Matlab 45 | implementation of [jorgensen2011]_. 46 | 47 | References 48 | ---------- 49 | .. [ewert2000] S. D. Ewert and T. Dau: Characterizing frequency 50 | selectivity for envelope fluctuations.. J. Acoust. Soc. Am. 108 51 | (2000) 1181--1196. 52 | 53 | .. [jorgensen2011] S. Jørgensen and T. Dau: Predicting speech 54 | intelligibility based on the signal-to-noise envelope power ratio 55 | after modulation-frequency selective processing. J. Acoust. Soc. Am. 56 | 130 (2011) 1475--1487. 57 | 58 | """ 59 | 60 | def __init__(self, fs, modf, q=1., low_pass_order=3.): 61 | self.fs = fs 62 | self.modf = np.asarray(modf) 63 | self.q = q # Q-factor of band-pass filters 64 | self.lp_order = low_pass_order # order of the low-pass filter 65 | 66 | def _calculate_coefficients(self, freqs): 67 | fcs = self.modf[1:] 68 | fcut = self.modf[0] 69 | # Initialize transfer function 70 | TFs = np.zeros((len(fcs) + 1, len(freqs))).astype('complex') 71 | # Calculating frequency-domain transfer function for each center 72 | # frequency: 73 | for k in range(len(fcs)): 74 | TFs[k + 1, 1:] = \ 75 | 1. / (1. + (1j * self.q * (freqs[1:] / fcs[k] - fcs[k] / 76 | freqs[1:]))) # p287 Hambley. 77 | 78 | # squared filter magnitude transfer functions 79 | Wcf = np.square(np.abs(TFs)) 80 | # Low-pass filter squared transfer function, third order Butterworth 81 | # filter 82 | # TF from: 83 | # http://en.wikipedia.org/wiki/Butterworth_filter 84 | Wcf[0, :] = 1 / ( 85 | 1 + ((2 * pi * freqs / (2 * pi * fcut)) ** (2 * self.lp_order))) 86 | # Transfer function of low-pass filter 87 | TFs[0, :] = np.sqrt(Wcf[0, :]) 88 | return TFs, Wcf 89 | 90 | def filter(self, signal): 91 | """ 92 | 93 | Parameters 94 | ---------- 95 | signal : ndarray 96 | Temporal envelope of a signal 97 | Returns 98 | ------- 99 | tuple of ndarray 100 | Integrated power spectrum at the output of each filter 101 | Filtered time signals. 102 | """ 103 | 104 | # Make signal odd length 105 | signal = signal[0:-1] if (len(signal) % 2) == 0 else signal 106 | 107 | n = signal.shape[-1] # length of envelope signals 108 | X = fft(signal) 109 | X_mag = np.abs(X) 110 | X_power = np.square(X_mag) / n # power spectrum 111 | X_power_pos = X_power[0:np.floor(n / 2).astype('int') + 1] 112 | # take positive frequencies only and multiply by two to get the same total 113 | # energy 114 | X_power_pos[1:] = X_power_pos[1:] * 2 115 | 116 | pos_freqs = np.linspace(0, self.fs / 2, X_power_pos.shape[-1]) 117 | # Concatenate vector of 0:fs and -fs:1 118 | freqs = np.concatenate((pos_freqs, pos_freqs[-1:0:-1])) 119 | 120 | TFs, Wcf = self._calculate_coefficients(freqs) 121 | 122 | # initialize output product: 123 | vout = np.zeros((len(self.modf), len(pos_freqs))) 124 | powers = np.zeros(len(self.modf)) 125 | 126 | # ------------ DC-power, -------------------------- 127 | # here divide by two such that a fully modulated tone has an AC-power of 1. 128 | dc_power = X_power_pos[0] / n / 2 129 | # ------------------------------------------------ 130 | X_filt = np.zeros((Wcf.shape[0], X.shape[-1]), dtype='complex128') 131 | filtered_envs = np.zeros_like(X_filt, dtype='float') 132 | 133 | for k, (w, TF) in enumerate(zip(Wcf, TFs)): 134 | vout[k] = X_power_pos * w[:np.floor(n / 2).astype('int') + 1] 135 | # Integration estimated as a sum from f > 0 136 | # integrate envelope power in the pass-band of the filter. Index goes 137 | # from 2:end since integration is for f>0 138 | powers[k] = np.sum(vout[k, 1:]) / n / dc_power 139 | # Filtering and inverse Fourier transform to get time signal. 140 | X_filt[k] = X * TF 141 | filtered_envs[k] = np.real(ifft(X_filt[k])) 142 | powers[np.isnan(powers)] = 0 143 | return powers, filtered_envs 144 | -------------------------------------------------------------------------------- /pambox/distort.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | :mod:`pambox.distort` regroups various types of distortions and processings 4 | that can be applied to signals. 5 | """ 6 | from __future__ import absolute_import, division, print_function 7 | 8 | import numpy as np 9 | import scipy as sp 10 | from six.moves import zip 11 | from scipy.io import wavfile 12 | 13 | from pambox import utils 14 | from pambox.utils import fftfilt, hilbert 15 | import six 16 | 17 | try: 18 | _ = np.use_fastnumpy # MKL FFT optimizations from Enthought. 19 | from numpy.fft import fft, ifft, rfft, irfft 20 | except AttributeError: 21 | try: 22 | import mklfft # MKL FFT optimizations from Continuum Analytics 23 | from numpy.fft import fft, ifft, rfft, irfft 24 | except ImportError: 25 | from scipy.fftpack import fft, ifft 26 | from numpy.fft import rfft, irfft 27 | 28 | 29 | def mix_noise(clean, noise, sent_level, snr=None): 30 | """Mix a signal signal noise at a given signal-to-noise ratio. 31 | 32 | Parameters 33 | ---------- 34 | clean : ndarray 35 | Clean signal. 36 | noise : ndarray 37 | Noise signal. 38 | sent_level : float 39 | Sentence level, in dB SPL. 40 | snr : 41 | Signal-to-noise ratio at which to mix the signals, in dB. If snr is 42 | `None`, no noise is mixed with the signal (Default value = None) 43 | 44 | Returns 45 | ------- 46 | tuple of ndarrays 47 | Returns the clean signal, the mixture, and the noise. 48 | 49 | """ 50 | 51 | # Pick a random section of the noise 52 | n_clean = len(clean) 53 | n_noise = len(noise) 54 | if n_noise > n_clean: 55 | start_idx = np.random.randint(n_noise - n_clean) 56 | noise = noise[start_idx:start_idx + n_clean] 57 | 58 | if snr is not None: 59 | # Get speech level and set noise level accordingly 60 | # clean_level = utils.dbspl(clean) 61 | # noise = utils.setdbspl(noise, clean_level - snr) 62 | noise = noise / utils.rms(noise) * 10 ** ((sent_level - snr) / 20) 63 | mix = clean + noise 64 | else: 65 | mix = clean 66 | 67 | return clean, mix, noise 68 | 69 | 70 | def phase_jitter(x, a): 71 | """ 72 | Apply phase jitter to a signal. 73 | 74 | The expression of phase jitter is: 75 | 76 | .. math:: y(t) = s(t) * cos(\Phi(t)), 77 | 78 | where :math:`\Phi(t)` is a random process uniformly distributed over 79 | :math:`[0, 2\pi\\alpha]`. The effect of the jitter when :math:`\\alpha` 80 | is 0.5 or 1 is to completely destroy the carrier signal, effectively 81 | yielding modulated white noise. 82 | 83 | Parameters 84 | ---------- 85 | x : ndarray 86 | Signal 87 | a : float 88 | Phase jitter parameter, typically between 0 and 1, but it can be 89 | anything. 90 | 91 | Returns 92 | ------- 93 | ndarray 94 | Processed signal of the same dimension as the input signal. 95 | 96 | """ 97 | n = len(x) 98 | return x * np.cos(2 * np.pi * a * np.random.random_sample(n)) 99 | 100 | 101 | def reverb(x, rt): 102 | """ 103 | Applies reverberation to a signal. 104 | 105 | Parameters 106 | ---------- 107 | x : ndarray 108 | Input signal. 109 | rt : float 110 | Reverberation time 111 | 112 | 113 | Returns 114 | ------- 115 | ndarray 116 | Processed signal. 117 | 118 | """ 119 | pass 120 | 121 | 122 | def spec_sub(x, noise, factor, w=1024 / 2., padz=1024 / 2., shift_p=0.5): 123 | """ 124 | Apply spectral subtraction to a signal. 125 | 126 | The defaul values of the parameters are typical for a sampling frequency of 127 | 44100 Hz. Note that (W+padz) is the final frame window and hence the fft 128 | length (it is normally chose as a power of 2). 129 | 130 | Parameters 131 | ---------- 132 | x : ndarray 133 | Input signal 134 | noise : 135 | Input noise signal 136 | factor : float 137 | Noise subtraction factor, must be larger than 0. 138 | w : int 139 | Frame length, in samples. (Default value = 1024 / 2.) 140 | padz : int 141 | Zero padding (pad with padz/2 from the left and the right) (Default 142 | value = 1024 / 2.) 143 | shift_p : float 144 | Shift percentage (overlap) between each window, in fraction of the 145 | window size (Default value = 0.5) 146 | 147 | Returns 148 | ------- 149 | clean_estimate : ndarray 150 | Estimate of the clean signal. 151 | noise_estimate : ndarray 152 | Estimate of the noisy signal. 153 | 154 | """ 155 | wnd = np.hanning(w + 2) # create hanning window with length = W 156 | wnd = wnd[1:-1] 157 | 158 | stim = np.vstack((x, noise)) 159 | 160 | len_signal = stim.shape[-1] # Signal length 161 | shift_p_indexes = np.floor(w * shift_p) 162 | n_segments = np.floor((len_signal - w) / shift_p_indexes + 1) 163 | len_segment = w + padz * 2 * shift_p 164 | y = np.empty((2, n_segments, len_segment)) 165 | # Initialize arrays for spectral subtraction. Use only positive 166 | # frequencies. 167 | Y_hat = np.empty((n_segments, len_segment / 2 + 1)) 168 | PN_hat = Y_hat.copy() 169 | 170 | # For each signal 171 | for k in range(2): 172 | # CUT THE APPROPRIATE SIGNAL FRAMES 173 | indexes = np.tile(np.arange(w), (n_segments, 1)) 174 | index_shift = np.arange(n_segments) * shift_p_indexes 175 | indexes = indexes + index_shift[:, np.newaxis] 176 | y_tmp = stim[k] 177 | y_tmp = y_tmp[indexes.astype('int')] * wnd 178 | # PAD WITH ZEROS 179 | pad = np.zeros((n_segments, padz / 2)) 180 | y_pad = np.hstack((pad, y_tmp, pad)) 181 | y[k, :, :] = y_pad 182 | 183 | # FREQUENCY DOMAIN 184 | 185 | # signal: 186 | Y = fft(y[0]) 187 | # YY = Y(1:round(end/2)+1,:); # Half window (exploit the symmetry) 188 | YY = Y[:, :(len_segment / 2 + 1)] # Half window (exploit the symmetry) 189 | YPhase = np.angle(YY) # Phase 190 | Y1 = np.abs(YY) # Spectrum 191 | Y2 = Y1 ** 2 # Power Spectrum 192 | 193 | # noise: 194 | Y_N = fft(y[1]) 195 | YY_N = Y_N[:, :(len_segment / 2 + 1)] # Half window (exploit the symmetry) 196 | Y_NPhase = np.angle(YY_N) # Phase 197 | Y_N1 = np.abs(YY_N) # Spectrum 198 | Y_N2 = Y_N1 ** 2 # Power Spectrum 199 | 200 | # The noise "estimate" is simply the average of the noise power 201 | # spectral density in the frame: 202 | P_N = Y_N2.mean(axis=-1) 203 | 204 | Y_hat = Y2 - factor * P_N[:, np.newaxis] # subtraction 205 | Y_hat = np.maximum(Y_hat, 0) # Make the minima equal zero 206 | PN_hat = Y_N2 - factor * P_N[:, np.newaxis] # subtraction for noise alone 207 | # PN_hat = np.maximum(PN_hat, 0) 208 | PN_hat[Y_hat == 0] = 0 209 | 210 | Y_hat[0:2, :] = 0 211 | PN_hat[0:2, :] = 0 212 | # Combining the estimated power spectrum with the original noisy phase, 213 | # and add the frames using an overlap-add technique 214 | output_Y = overlap_and_add(np.sqrt(Y_hat), YPhase, (w + padz), shift_p * w) 215 | output_N = overlap_and_add(np.sqrt(PN_hat.astype('complex')), 216 | Y_NPhase, (w + padz), shift_p * w) 217 | 218 | return output_Y, output_N 219 | 220 | 221 | def overlap_and_add(powers, phases, len_window, shift_size): 222 | """Reconstruct a signal with the overlap and add method. 223 | 224 | Parameters 225 | ---------- 226 | powers : ndarray 227 | Magnitude of the power spectrum of the signal to reconstruct. 228 | phases : ndarray 229 | Phase of the signal to reconstruct. 230 | len_window : int 231 | Frame length, in samples. 232 | shift_size : int 233 | Shift length. For non overlapping signals, in would equal `len_window`. 234 | For 50% overlapping signals, it would be `len_window/2`. 235 | 236 | Returns 237 | ------- 238 | ndarray 239 | Reconstructed time-domain signal. 240 | 241 | """ 242 | len_window = int(len_window) 243 | shift_size = int(shift_size) 244 | n_frames, len_frame = powers.shape 245 | spectrum = powers * np.exp(1j * phases) 246 | signal = np.zeros(n_frames * shift_size + len_window - shift_size) 247 | 248 | # Create full spectrum, by joining conjugated positive spectrum 249 | if len_window % 2: 250 | # Do no duplicate the DC bin 251 | spectrum = np.hstack((spectrum, np.conj(np.fliplr(spectrum[:, 1:])))) 252 | else: 253 | # If odd-numbered, do not duplicated the DC ans FS/2 bins 254 | spectrum = np.hstack((spectrum, 255 | np.conj(np.fliplr(spectrum[:, 1:-1])))) 256 | 257 | signal = np.zeros((n_frames - 1) * shift_size + len_window) 258 | 259 | for i_frame, hop in enumerate(range(0, 260 | len(signal) - int(len_window) + 1, 261 | int(shift_size))): 262 | signal[hop:hop + len_window] \ 263 | += np.real(ifft(spectrum[i_frame], len_window)) 264 | return signal 265 | 266 | 267 | class WestermannCrm(object): 268 | """Applies HRTF and BRIR for a given target and masker distance. 269 | 270 | Parameters 271 | ---------- 272 | fs : int 273 | Samping frequenc of the process. (Default value = 40000) 274 | 275 | Attributes 276 | ---------- 277 | brir : dict 278 | Binaural room impulse responses for each distance. 279 | delays : dict 280 | Delay until the first peak in the BRIR for each distance. 281 | dist : ndarray 282 | List of the valid distances (0.5, 2, 5, and 10 meters). 283 | 284 | References 285 | ---------- 286 | .. [1] A. Westermann and J. M. Buchholz: Release from masking through 287 | spatial separation in distance in hearing impaired listeners. 288 | Proceedings of Meetings on Acoustics 19 (2013) 050156. 289 | """ 290 | 291 | def __init__(self, fs=40000): 292 | self.dist = np.asarray([0.5, 2, 5, 10]) 293 | self.fs = fs 294 | self.brir = self._load_brirs() 295 | self.delays = self._find_delay() 296 | 297 | def _load_brirs(self): 298 | """Loads BRIRs from file.""" 299 | brirs = {} 300 | for d in self.dist: 301 | fname = '../stimuli/crm/brirs_{fs}/aud{d_str}m.wav'.format( 302 | fs=self.fs, 303 | d_str=self._normalize_fname(d) 304 | ) 305 | wav = wavfile.read(fname) 306 | brirs[d] = np.array(wav[1].astype('float') / 2. ** 15).T 307 | return brirs 308 | 309 | def _find_delay(self): 310 | """Calculates the delay of the direct sound, in samples.""" 311 | delays = {} 312 | for k, v in six.iteritems(self.brir): 313 | x = np.mean(v, axis=0) 314 | delays[k] = np.abs(x).argmax() 315 | return delays 316 | 317 | @staticmethod 318 | def _normalize_fname(d): 319 | """ 320 | 321 | Parameters 322 | ---------- 323 | d : float 324 | 325 | Returns 326 | ------- 327 | 328 | """ 329 | if d > 1: 330 | d_str = str('%d' % d) 331 | else: 332 | d_str = str(d).replace('.', '') 333 | return d_str 334 | 335 | def _load_eqfilt(self, tdist, mdist): 336 | """ 337 | Returns the equalization filter for the pair of target and masker. 338 | 339 | Parameters 340 | ---------- 341 | tdist : float 342 | Target distance in meters. Must be in the set (0.5, 2, 5, 10). 343 | mdist : 344 | Masker distance in meters. Must be in the set (0.5, 2, 5, 10). 345 | 346 | Returns 347 | ------- 348 | ndarray 349 | Equalization filter. 350 | 351 | """ 352 | eqfilt_name = 't{}m_m{}m.mat'.format(self._normalize_fname(tdist), 353 | self._normalize_fname(mdist)) 354 | eqfilt_path = '../stimuli/crm/eqfilts_{}/{}'.format(self.fs, 355 | eqfilt_name) 356 | try: 357 | eqfilt = sp.io.loadmat(eqfilt_path, squeeze_me=True) 358 | except IOError: 359 | raise IOError('Cannot file file %s' % eqfilt_path) 360 | return eqfilt 361 | 362 | def apply(self, x, m, tdist, mdist, align=True): 363 | """Applies the "Westermann" distortion to a target and masker. 364 | 365 | target and masker are not co-located, the masker is equalized before 366 | applying the BRIR, so that both the target and masker will have the 367 | same average spectrum after the BRIR filtering. 368 | 369 | By default, the delay introduced by the BRIR is compensated for, 370 | such that the maxiumum of the BRIR happen simulatenously. 371 | 372 | Parameters 373 | ---------- 374 | x : ndarray 375 | Mono clean speech signal of length `N`. 376 | m : ndarray 377 | Mono masker signal of length `N`. 378 | tdist : float 379 | Target distance, in meters. 380 | mdist : float 381 | Masker distance, in meters. 382 | align : bool 383 | Compensate for the delay in the BRIRs with distance (default is 384 | `True`). 385 | 386 | Returns 387 | ------- 388 | mix : (2, N) ndarray 389 | Mixture processesed by the BRIRs. 390 | noise : (2, N) 391 | Noise alone processed by the BRIRs. 392 | """ 393 | if tdist not in self.dist or mdist not in self.dist: 394 | raise ValueError('The distance values are incorrect.') 395 | 396 | n_orig = x.shape[-1] 397 | 398 | # Filter target with BRIR only 399 | out_x = np.asarray([fftfilt(b, x) for b in self.brir[tdist]]) 400 | 401 | # Equalized masker and then apply the BRIR 402 | if tdist == mdist: 403 | m = [m, m] 404 | else: 405 | eqfilt = self._load_eqfilt(tdist, mdist) 406 | m = [fftfilt(b, m) for b in [eqfilt['bl'], eqfilt['br']]] 407 | 408 | out_m = np.asarray([fftfilt(b, chan) for b, chan 409 | in zip(self.brir[mdist], m)]) 410 | 411 | if align: 412 | i_x, i_m = self._calc_aligned_idx(tdist, mdist) 413 | else: 414 | i_x = 0 415 | i_m = 0 416 | 417 | # Pad with zeros if necessary, so that the lengths stay the same 418 | out_x, out_m = utils.make_same_length(out_x[:, i_x:], out_m[:, i_m:]) 419 | return out_x, out_m 420 | 421 | def _calc_aligned_idx(self, tdist, mdist): 422 | """Calculates the index of the required delay to align the max of the 423 | BRIRs 424 | 425 | Parameters 426 | ---------- 427 | tdist : 428 | float, distance to target, in meters 429 | mdist : 430 | float, distance to masker, in meters 431 | :return: tuple, index of the target and masker. 432 | 433 | Returns 434 | ------- 435 | i_x : int 436 | Index of earliest peak in the signal. 437 | i_m : int 438 | Index of the earliest peak in the maskers. 439 | """ 440 | # location of earliest peak 441 | m_is_shortest = np.argmin([self.delays[tdist], self.delays[mdist]]) 442 | if m_is_shortest: 443 | i_x = self.delays[tdist] - self.delays[mdist] 444 | i_m = 0 445 | else: 446 | i_x = 0 447 | i_m = self.delays[mdist] - self.delays[tdist] 448 | return i_x, i_m 449 | 450 | 451 | def noise_from_signal(x, fs=40000, keep_env=False): 452 | """Create a noise with same spectrum as the input signal. 453 | 454 | Parameters 455 | ---------- 456 | x : array_like 457 | Input signal. 458 | fs : int 459 | Sampling frequency of the input signal. (Default value = 40000) 460 | keep_env : bool 461 | Apply the envelope of the original signal to the noise. (Default 462 | value = False) 463 | 464 | Returns 465 | ------- 466 | ndarray 467 | Noise signal. 468 | 469 | """ 470 | x = np.asarray(x) 471 | n_x = x.shape[-1] 472 | n_fft = utils.next_pow_2(n_x) 473 | X = rfft(x, utils.next_pow_2(n_fft)) 474 | # Randomize phase. 475 | noise_mag = np.abs(X) * np.exp( 476 | 2 * np.pi * 1j * np.random.random(X.shape[-1])) 477 | noise = np.real(irfft(noise_mag, n_fft)) 478 | out = noise[:n_x] 479 | 480 | if keep_env: 481 | env = np.abs(hilbert(x)) 482 | [bb, aa] = sp.signal.butter(6, 50 / (fs / 2)) # 50 Hz LP filter 483 | env = sp.signal.filtfilt(bb, aa, env) 484 | out *= env 485 | 486 | return out 487 | -------------------------------------------------------------------------------- /pambox/inner.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | :mod:`pambox.inner` regroups processes of the inner ear. 4 | """ 5 | from __future__ import absolute_import, division, print_function 6 | 7 | import numpy as np 8 | from numpy import exp, sin, cos, sqrt, abs, ones, pi 9 | import scipy as sp 10 | import scipy.signal as ss 11 | 12 | from .utils import next_pow_2, hilbert 13 | 14 | 15 | try: 16 | _ = np.use_fastnumpy # Use Enthought MKL optimizations 17 | from numpy.fft import fft, ifft, rfft, irfft 18 | except AttributeError: 19 | try: 20 | import mklfft # MKL FFT optimizations from Continuum Analytics 21 | from numpy.fft import fft, ifft, rfft, irfft 22 | except ImportError: 23 | # Finally, just use Numpy's and Scipy's 24 | from scipy.fftpack import fft, ifft 25 | from numpy.fft import rfft, irfft 26 | 27 | 28 | CENTER_F = np.asarray([63, 80, 100, 125, 160, 200, 250, 315, 400, 500, 29 | 630, 800, 1000, 1250, 1600, 2000, 2500, 3150, 4000, 30 | 5000, 6300, 8000]) 31 | FS = np.asarray([22050.]) 32 | 33 | 34 | def erb_bandwidth(fc): 35 | """Bandwitdh of an Equivalent Rectangular Bandwidth (ERB). 36 | 37 | Parameters 38 | ---------- 39 | fc : ndarray 40 | Center frequency, or center frequencies, of the filter. 41 | 42 | Returns 43 | ------- 44 | ndarray or float 45 | Equivalent rectangular bandwidth of the filter(s). 46 | """ 47 | # In Hz, according to Glasberg and Moore (1990) 48 | return 24.7 + fc / 9.265 49 | 50 | 51 | def lowpass_env_filtering(x, cutoff=150., n=1, fs=22050): 52 | """Low-pass filters a signal using a Butterworth filter. 53 | 54 | Parameters 55 | ---------- 56 | x : ndarray 57 | cutoff : float, optional 58 | Cut-off frequency of the low-pass filter, in Hz. The default is 150 Hz. 59 | n : int, optional 60 | Order of the low-pass filter. The default is 1. 61 | fs : float, optional 62 | Sampling frequency of the signal to filter. The default is 22050 Hz. 63 | 64 | Returns 65 | ------- 66 | ndarray 67 | Low-pass filtered signal. 68 | 69 | """ 70 | 71 | b, a = sp.signal.butter(N=n, Wn=cutoff * 2. / fs, btype='lowpass') 72 | return sp.signal.lfilter(b, a, x) 73 | 74 | 75 | class GammatoneFilterbank(object): 76 | 77 | def __init__(self, fs, cf, b=1.019, order=1, q=9.26449, min_bw=24.7): 78 | """Gammatone Filterbank 79 | 80 | Parameters 81 | ---------- 82 | fs : float 83 | Sampling frequency of the signals to filter. 84 | cf : array_like 85 | Center frequencies of the filterbank. 86 | b : float 87 | beta of the gammatone filters. The default is `b` = 1.019. 88 | order : int 89 | Order. The default value is 1. 90 | q : float 91 | Q-value of the ERB. The default value is 9.26449. 92 | min_bw : float 93 | Minimum bandwidth of an ERB. 94 | 95 | References 96 | ---------- 97 | 98 | """ 99 | 100 | self.fs = fs 101 | try: 102 | len(cf) 103 | except TypeError: 104 | cf = [cf] 105 | self.cf = np.asarray(cf) 106 | self.b = b 107 | self.erb_order = order 108 | self.q = q 109 | self.min_bw = min_bw 110 | 111 | 112 | def _calculate_coefficients(self): 113 | cf = self.cf 114 | b = self.b 115 | order = self.erb_order 116 | q = self.q 117 | min_bw = self.min_bw 118 | 119 | erb = ((cf / q) ** order + min_bw ** order) ** (1 / order) 120 | t = 1 / self.fs 121 | b = b * 2 * pi * erb 122 | a0 = t 123 | a2 = 0 124 | b0 = 1 125 | b1 = -2 * cos(2 * cf * pi * t) / exp(b * t) 126 | b2 = exp(-2 * b * t) 127 | a11 = -(2 * t * cos(2 * cf * pi * t) / exp(b * t) + 2 * sqrt( 128 | 3 + 2 ** 1.5) * t * sin(2 * cf * pi * t) / exp(b * t)) / 2 129 | a12 = -(2 * t * cos(2 * cf * pi * t) / exp(b * t) - 2 * sqrt( 130 | 3 + 2 ** 1.5) * t * sin(2 * cf * pi * t) / exp(b * t)) / 2 131 | a13 = -(2 * t * cos(2 * cf * pi * t) / exp(b * t) + 2 * sqrt( 132 | 3 - 2 ** 1.5) * t * sin(2 * cf * pi * t) / exp(b * t)) / 2 133 | a14 = -(2 * t * cos(2 * cf * pi * t) / exp(b * t) - 2 * sqrt( 134 | 3 - 2 ** 1.5) * t * sin(2 * cf * pi * t) / exp(b * t)) / 2 135 | i = 1j 136 | gain = abs((-2 * exp(4 * i * cf * pi * t) * t + 137 | 2 * exp(-(b * t) + 2 * i * cf * pi * t) * t * 138 | (cos(2 * cf * pi * t) - sqrt(3 - 2 ** (3. / 2)) * 139 | sin(2 * cf * pi * t))) * 140 | (-2 * exp(4 * i * cf * pi * t) * t + 141 | 2 * exp(-(b * t) + 2 * i * cf * pi * t) * t * 142 | (cos(2 * cf * pi * t) + sqrt(3 - 2 ** (3. / 2)) * 143 | sin(2 * cf * pi * t))) * 144 | (-2 * exp(4 * i * cf * pi * t) * t + 145 | 2 * exp(-(b * t) + 2 * i * cf * pi * t) * t * 146 | (cos(2 * cf * pi * t) - 147 | sqrt(3 + 2 ** (3. / 2)) * sin(2 * cf * pi * t))) * 148 | (-2 * exp(4 * i * cf * pi * t) * t + 2 * exp( 149 | -(b * t) + 2 * i * cf * pi * t) * t * 150 | (cos(2 * cf * pi * t) + sqrt(3 + 2 ** (3. / 2)) * sin( 151 | 2 * cf * pi * t))) / 152 | (-2 / exp(2 * b * t) - 2 * exp(4 * i * cf * pi * t) + 153 | 2 * (1 + exp(4 * i * cf * pi * t)) / exp(b * t)) ** 4) 154 | allfilts = ones(len(cf)) 155 | return a0 * allfilts, a11, a12, a13, a14, a2 * allfilts, \ 156 | b0 * allfilts, b1, b2, gain 157 | 158 | def filter(self, x): 159 | """Filters a signal along its last dimension. 160 | 161 | Parameters 162 | ---------- 163 | x : ndarray 164 | Signal to filter. 165 | 166 | Returns 167 | ------- 168 | ndarray 169 | Filtered signals with shape ``(M, N)``, where ``M`` is the number of 170 | channels, and ``N`` is the input signal's nubmer of samples. 171 | """ 172 | 173 | a0, a11, a12, a13, a14, a2, b0, b1, b2, gain = self._calculate_coefficients() 174 | 175 | output = np.zeros((gain.shape[0], x.shape[-1])) 176 | for chan in range(gain.shape[0]): 177 | y1 = ss.lfilter([a0[chan] / gain[chan], a11[chan] / gain[chan], 178 | a2[chan] / gain[chan]], 179 | [b0[chan], b1[chan], b2[chan]], x) 180 | y2 = ss.lfilter([a0[chan], a12[chan], a2[chan]], 181 | [b0[chan], b1[chan], b2[chan]], y1) 182 | y3 = ss.lfilter([a0[chan], a13[chan], a2[chan]], 183 | [b0[chan], b1[chan], b2[chan]], y2) 184 | y4 = ss.lfilter([a0[chan], a14[chan], a2[chan]], 185 | [b0[chan], b1[chan], b2[chan]], y3) 186 | output[chan, :] = y4 187 | 188 | return output 189 | 190 | 191 | class RectangularFilterbank(object): 192 | 193 | def __init__(self, fs, center_f, width=3, output_time=False): 194 | """Rectangular filterbank with Nth-octave wide filters. 195 | 196 | Parameters 197 | ---------- 198 | fs : int 199 | Sampling frequency of the input signal. 200 | center_f : array_like 201 | List of the center frequencies of the filterbank. 202 | width : float 203 | Width of the filters, in fraction of octave. The default value is 3, 204 | therefore 1/3-octave. 205 | output_time : bool, optional 206 | If `True`, also outputs the time output of the filtering. The default 207 | is to output the RMS value of each band only. Doing the inverse FFT 208 | is very costly; setting the argument to `False` prevents from doing 209 | that computation. 210 | 211 | Returns 212 | ------- 213 | out_rms : ndarray 214 | RMS power at the output of each filter. 215 | out_time : ndarray 216 | Time signals at the output of the filterbank. The shape is (`len( 217 | center_f) x len(x)`). 218 | 219 | """ 220 | self.fs = fs 221 | self.center_f = center_f 222 | self.width = width 223 | self.output_time = output_time 224 | 225 | def filter(self, x): 226 | """ 227 | Parameters 228 | ---------- 229 | x : array_like 230 | Input signal 231 | 232 | Returns 233 | ------- 234 | 235 | Notes 236 | ----- 237 | 238 | This method uses Numpy's FFT because it returns 1 complex result per 239 | frequency bin, instead of Scipy's rfft, which returns 2 real results 240 | per frequency bin. 241 | """ 242 | 243 | center_f = np.asarray(self.center_f, dtype='float') 244 | 245 | n = len(x) 246 | # TODO Use powers of 2 to calculate the power spectrum, and also, possibly 247 | # use RFFT instead of the complete fft. 248 | X = rfft(x) 249 | X_pow = np.abs(X) ** 2 / n # Power spectrum 250 | X_pow[1:] = X_pow[1:] * 2. 251 | bound_f = np.zeros(len(center_f) + 1) 252 | bound_f[0] = center_f[0] * 2. ** (- 1. / (2. * self.width)) 253 | bound_f[1:] = center_f * 2. ** (1. / (2. * self.width)) 254 | bound_f = bound_f[bound_f < self.fs / 2] 255 | # Convert from frequencies to vector indexes. Factor of two is because 256 | # we consider positive frequencies only. 257 | bound_idx = np.floor(bound_f / (self.fs / 2.) * len(X_pow)).astype('int') 258 | # Initialize arrays 259 | out_rms = np.zeros(len(center_f)) 260 | out_time = np.zeros((len(center_f), x.shape[-1]), dtype='complex') 261 | for idx, (l, f) in enumerate(zip(bound_idx[0:], bound_idx[1:])): 262 | out_time[idx, l:f] = X[l:f] 263 | out_rms[idx] = np.sqrt(np.sum(X_pow[l:f]) / n) 264 | if self.output_time: 265 | out_time = np.real(irfft(out_time, n=n, axis=-1)) 266 | return out_rms, out_time 267 | else: 268 | return out_rms 269 | 270 | 271 | def hilbert_envelope(signal): 272 | """Calculates the Hilbert envelope of a signal. 273 | 274 | Parameters 275 | ---------- 276 | signal : array_like 277 | Signal on which to calculate the hilbert envelope. The calculation 278 | is done along the last axis (i.e. ``axis=-1``). 279 | 280 | Returns 281 | ------- 282 | ndarray 283 | 284 | """ 285 | signal = np.asarray(signal) 286 | N_orig = signal.shape[-1] 287 | # Next power of 2. 288 | N = next_pow_2(N_orig) 289 | y_h = hilbert(signal, N) 290 | # Return signal with same shape as original 291 | return np.abs(y_h[..., :N_orig]) 292 | -------------------------------------------------------------------------------- /pambox/middle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | :mod:`pambox.periph.middle` regroups processes of the middle ear. 4 | """ 5 | from __future__ import absolute_import, division, print_function 6 | 7 | -------------------------------------------------------------------------------- /pambox/outer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | :mod:`pambox.periph.outer` regroups processes of the outer ear. 4 | """ 5 | from __future__ import absolute_import, division, print_function 6 | 7 | -------------------------------------------------------------------------------- /pambox/speech/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`pambox.speech` module gather speech intelligibility 3 | models, a framework to run intelligibility experiments, as well as a wrapper 4 | around speech materials. 5 | """ 6 | from __future__ import absolute_import, division, print_function 7 | 8 | __all__ = [ 9 | 'Sepsm', 10 | 'MrSepsm', 11 | 'BsEPSM', 12 | 'SII', 13 | 'Material', 14 | 'Experiment' 15 | ] 16 | 17 | from .experiment import Experiment 18 | from .material import Material 19 | from .sepsm import Sepsm 20 | from .mrsepsm import MrSepsm 21 | from .sii import SII 22 | from .bsepsm import BsEPSM 23 | 24 | -------------------------------------------------------------------------------- /pambox/speech/bsepsm.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | from collections import namedtuple 3 | import logging 4 | 5 | import numpy as np 6 | from matplotlib import pyplot as plt 7 | from matplotlib.pyplot import plot, xlabel, ylabel, legend 8 | from scipy.signal import butter, filtfilt 9 | 10 | from pambox.central import EC 11 | from pambox.speech import MrSepsm 12 | 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | Ears = namedtuple('Ears', ['left', 'right']) 17 | 18 | 19 | class BsEPSM(MrSepsm): 20 | 21 | """Binaural implementation of the sEPSM model. 22 | 23 | Implementation used in [chabot-leclerc2016]_. 24 | 25 | References 26 | ---------- 27 | .. [chabot-leclerc2016] 28 | 29 | """ 30 | 31 | def __init__(self, 32 | fs=22050, 33 | name='BinauralMrSepsm', 34 | cf=MrSepsm._default_center_cf, 35 | modf=MrSepsm._default_modf, 36 | downsamp_factor=10, 37 | noise_floor=0.001, 38 | snr_env_limit=0.001, 39 | sigma_e=0.25, 40 | sigma_d=105e-6, 41 | fast_cancel=True, 42 | debug=False, 43 | win_len=0.02, 44 | ec_padding_windows=10 45 | ): 46 | """@todo: to be defined1. """ 47 | MrSepsm.__init__(self, fs, cf, modf, downsamp_factor, noise_floor, 48 | snr_env_limit, output_time_signals=True) 49 | self.name = name 50 | self.overlap = 0.5 51 | self.win_len = win_len 52 | self.sigma_e = sigma_e 53 | self.sigma_d = sigma_d 54 | self.ec_padding_windows = ec_padding_windows 55 | self.fast_cancel = fast_cancel 56 | self._key_alpha = 'alpha' 57 | self._key_tau = 'tau' 58 | self.debug = debug 59 | self.env_lp_cutoff = 770 # Hz, from breebaart2001binaurala 60 | self.env_lp_order = 5 # from breebaart2001binaurala 61 | self._ec_process = EC(fs, win_len=self.win_len, overlap=self.overlap, 62 | sigma_d=self.sigma_d, sigma_e=self.sigma_e, 63 | fast_cancel=fast_cancel, 64 | padding_windows=self.ec_padding_windows) 65 | 66 | def _ec_equalize(self, left, right): 67 | alphas, taus = self._ec_process.equalize(left, right, self.cf) 68 | return alphas, taus 69 | 70 | def _ec_cancel(self, left, right, alphas, taus): 71 | cancelled_mix = self._ec_process.cancel( 72 | left, right, alphas, taus) 73 | cancelled_mix = np.abs(cancelled_mix) 74 | return cancelled_mix 75 | 76 | def _apply_bu_process(self, left, right, bands=None): 77 | """Apply EC process between left and right envelopes (mix and noise), 78 | apply sEPSM processing to the resulting signals and calculate the 79 | SNR_env. 80 | 81 | Parameters 82 | ---------- 83 | left, right : dictionary 84 | Outputs of the mr-sEPSM model. The dictionaries must have a 85 | 'chan_envs' key. 86 | bands : list of int 87 | Indices of the channels to process. If `None`, all channels are 88 | processes. Defaults to None. 89 | 90 | Returns 91 | ------- 92 | bu_mr_snr_env_matrix : ndarray 93 | Multi-resolution SNRenv matrix of shape N_CHAN x N_SAMPLES. 94 | alphas : ndarray of float 95 | Optimal gains, in samples, calculated by the "equalize" 96 | process. 97 | tau : ndarray of integers 98 | Optimal time delays, in samples, calculated by the "equalize" 99 | process. 100 | 101 | """ 102 | left_mix_envs = left['chan_envs'][-2] 103 | right_mix_envs = right['chan_envs'][-2] 104 | left_noise_envs = left['chan_envs'][-1] 105 | right_noise_envs = right['chan_envs'][-1] 106 | 107 | # ... then we find the alpha and tau parameters the minimize the noise 108 | # energy... 109 | alphas, taus = self._ec_equalize(left_noise_envs, right_noise_envs) 110 | # ... then we perform the cancellation with those alphas and taus for 111 | # the mixture and noise... 112 | cancelled_mix = self._ec_cancel(left_mix_envs, right_mix_envs, alphas, 113 | taus) 114 | cancelled_noise = self._ec_cancel(left_noise_envs, right_noise_envs, 115 | alphas, taus) 116 | 117 | # ... then we apply the same processing as the mr-sEPSM, until we 118 | # have the multi-resolution excitation patterns... 119 | mix_mr_exc_ptns = self._apply_sepsm_processing(cancelled_mix[ 120 | np.newaxis]) 121 | noise_mr_exc_ptns = self._apply_sepsm_processing(cancelled_noise[ 122 | np.newaxis]) 123 | 124 | # --- BU SNRenv --- 125 | # ... we can finally calculate the BU SNRenv by calculating the 126 | # SNRenv. 127 | bu_mr_snr_env_matrix, _ = self._snr_env( 128 | mix_mr_exc_ptns, 129 | noise_mr_exc_ptns 130 | ) 131 | return bu_mr_snr_env_matrix, alphas, taus, mix_mr_exc_ptns, noise_mr_exc_ptns 132 | 133 | def _apply_sepsm_processing(self, envs): 134 | filtered_envs, _ = self._mod_filtering(envs) 135 | mr_exc_ptns = self._mr_env_powers(envs, filtered_envs) 136 | return mr_exc_ptns[0] 137 | 138 | def _apply_be_process(self, left, right): 139 | # --- Better ear (BE) --- 140 | be_mr_snr_env_matrix = self._better_ear( 141 | left['mr_snr_env_matrix'], 142 | right['mr_snr_env_matrix'], 143 | left['bands_above_thres_idx'], 144 | right['bands_above_thres_idx'] 145 | ) 146 | return be_mr_snr_env_matrix 147 | 148 | def _better_ear(self, left, right, left_idx, right_idx): 149 | """Return the better-ear SNRenv for bands above threshold only. 150 | 151 | Parameters 152 | ---------- 153 | left, right: ndarray 154 | SNR_env values, of shape (N_CHAN, N_WIN). 155 | left_idx, right_idx: array_like 156 | Index of the bands above threshold for the left and right ear, 157 | respectively. 158 | 159 | """ 160 | left_idx = np.asarray(left_idx) 161 | right_idx = np.asarray(right_idx) 162 | 163 | be_snr_env = np.zeros_like(left) 164 | for side, idx in zip((left, right), (left_idx, right_idx)): 165 | try: 166 | be_snr_env[idx] = np.maximum(be_snr_env[idx], side[idx]) 167 | except IndexError: 168 | # BE SNRenv is not modified. 169 | pass 170 | return be_snr_env 171 | 172 | def _calc_bu_bands_above_thres(self, left, right): 173 | """Calculate bands above threshold for binaural unmasking. 174 | 175 | A band is considered above threshold if both bands are above 176 | threshold (logical 'and'). 177 | 178 | Parameters 179 | ---------- 180 | left, right : dictionaries 181 | Outputs from the mr-sEPSM prediction. Must have a 182 | 'bands_above_thres_idx' key. 183 | 184 | Returns 185 | ------- 186 | idx : array 187 | Indices of the bands that are above threshold in at least one side. 188 | 189 | """ 190 | 191 | left_bands_idx = left["bands_above_thres_idx"] 192 | right_bands_idx = right["bands_above_thres_idx"] 193 | 194 | # BU mask is when _both sides_ are above threshold. 195 | indices = list(set(left_bands_idx) & set(right_bands_idx)) 196 | return indices 197 | 198 | def _calc_be_bands_above_thres(self, left, right): 199 | """True if at least one side is above threshold. 200 | 201 | Parameters 202 | ---------- 203 | left, right : dictionaries 204 | Outputs from the mr-sEPSM prediction. Must have a 205 | 'bands_above_thres_idx' key. 206 | 207 | Returns 208 | ------- 209 | idx : array 210 | Indices of the bands that are above threshold in at least one side. 211 | """ 212 | 213 | left_bands_idx = left["bands_above_thres_idx"] 214 | right_bands_idx = right["bands_above_thres_idx"] 215 | 216 | indices = list(set(left_bands_idx) | set(right_bands_idx)) 217 | return indices 218 | 219 | def _apply_ba_process(self, be, bu, be_indices, bu_indices): 220 | """Applies the binaural-advantage process. 221 | 222 | The BA advantage selection is actually the exact same thing as the BE 223 | process: only bands above threshold for *that* signal are 224 | considered for the comparison. 225 | 226 | Parameters 227 | ---------- 228 | be, bu : ndarray 229 | Better-ear and Binaural-unmasking SNRenv. 230 | be_indices, bu_indices : lists of integers 231 | List of the indices for the channels that were above threshold 232 | for each input. 233 | 234 | Returns 235 | ------- 236 | ba : ndarray 237 | Combination of the better-ear and binaural unmasking SNRenv. 238 | 239 | """ 240 | 241 | ba = self._better_ear(be, bu, be_indices, bu_indices) 242 | return ba 243 | 244 | def predict(self, clean=None, mixture=None, noise=None): 245 | """Predict intelligibility. 246 | 247 | Parameters 248 | ---------- 249 | clean, mixture, noise : ndarray 250 | Binaural input signals. 251 | 252 | Returns 253 | ------- 254 | res : dict 255 | Model predictions and internal values. Model predictions are 256 | stored as a dictionary under the key `'p'`. 257 | 258 | """ 259 | # Calculate the mr-sEPSM prediction for each ear in one call.. 260 | binaural_res = [super(BsEPSM, self).predict(clean=c, mix=m, noise=n) 261 | for c, m, n in zip(clean, mixture, noise)] 262 | # ... and save them independently... 263 | ears_res = Ears(*binaural_res) 264 | log.debug('Left bands above threshold {}.'.format( 265 | ears_res.left["bands_above_thres_idx"])) 266 | log.debug('Right bands above threshold {}.'.format(ears_res.right[ 267 | "bands_above_thres_idx"])) 268 | 269 | # ... then apply the binaural unmasking (BU) process, which includes the 270 | # EC process and the mr-sEPSM process applied to the cancelled 271 | # signals... 272 | bu_mr_snr_env_matrix, alphas, taus, bu_mix_mr_exc_ptns, \ 273 | bu_noise_mr_exc_ptns \ 274 | = self._apply_bu_process(ears_res.left, ears_res.right) 275 | 276 | # ... in "parallel", we apply the better-ear (BE) process to the 277 | # multi-resolution SNRenv... 278 | be_mr_snr_env_matrix = self._apply_be_process(ears_res.left, 279 | ears_res.right) 280 | 281 | # ... then we select the bands that are considered "above threshold" 282 | # for the BU, BE and binaural advantage (BA)... 283 | bu_idx_above_thres = self._calc_bu_bands_above_thres(ears_res.left, 284 | ears_res.right) 285 | log.debug('BU bands above threshold {}.'.format(bu_idx_above_thres)) 286 | be_idx_above_thres = self._calc_be_bands_above_thres(ears_res.left, 287 | ears_res.right) 288 | log.debug('BE bands above threshold {}.'.format(be_idx_above_thres)) 289 | ba_idx_above_thres = list( 290 | set(be_idx_above_thres) | set(bu_idx_above_thres)) 291 | log.debug('BA bands above threshold {}.'.format(ba_idx_above_thres)) 292 | 293 | 294 | # ... then we combine the BE and BU as part of the "binaural 295 | # advantage"... 296 | ba_mr_snr_env_matrix = self._apply_ba_process( 297 | be_mr_snr_env_matrix, 298 | bu_mr_snr_env_matrix, 299 | be_idx_above_thres, 300 | bu_idx_above_thres) 301 | 302 | # ... we can now averaging over time the multi-resolution 303 | # representation... 304 | time_av_bu_snr_env = self._time_average(bu_mr_snr_env_matrix) 305 | time_av_be_snr_env = self._time_average(be_mr_snr_env_matrix) 306 | time_av_ba_snr_env = self._time_average(ba_mr_snr_env_matrix) 307 | 308 | # ... and combine the SNRenv for the bands that are above threshold 309 | # for each output type. 310 | bu_snr_env = self._optimal_combination( 311 | time_av_bu_snr_env, 312 | bu_idx_above_thres) 313 | be_snr_env = self._optimal_combination( 314 | time_av_be_snr_env, 315 | be_idx_above_thres) 316 | ba_snr_env = self._optimal_combination( 317 | time_av_ba_snr_env, 318 | ba_idx_above_thres) 319 | 320 | # Additional variation, where the multi-resolution representation is 321 | # not average over time at first. The whole mr representation is 322 | # combined optimally. 323 | full_bu_snr_env = self._optimal_combination( 324 | bu_mr_snr_env_matrix, 325 | bu_idx_above_thres 326 | ) 327 | full_be_snr_env = self._optimal_combination( 328 | be_mr_snr_env_matrix, 329 | be_idx_above_thres 330 | ) 331 | full_ba_snr_env = self._optimal_combination( 332 | ba_mr_snr_env_matrix, 333 | ba_idx_above_thres 334 | ) 335 | 336 | res = { 337 | 'p': { 338 | 'be_snr_env': be_snr_env, 339 | 'bu_snr_env': bu_snr_env, 340 | 'ba_snr_env': ba_snr_env, 341 | 'full_be_snr_env': full_be_snr_env, 342 | 'full_bu_snr_env': full_bu_snr_env, 343 | 'full_ba_snr_env': full_ba_snr_env, 344 | 'snr_env_l': ears_res.left['p']['snr_env'], 345 | 'snr_env_r': ears_res.right['p']['snr_env'] 346 | }, 347 | } 348 | if self.debug: 349 | res.update({ 350 | 'be_matrix': be_mr_snr_env_matrix, 351 | 'bu_matrix': bu_mr_snr_env_matrix, 352 | 'ba_matrix': ba_mr_snr_env_matrix, 353 | 'be_idx_above_threshold': be_idx_above_thres, 354 | 'bu_idx_above_threshold': bu_idx_above_thres, 355 | 'ba_idx_above_threshold': ba_idx_above_thres, 356 | 'ears': ears_res, 357 | 'time_av_be_snr_env': time_av_be_snr_env, 358 | 'time_av_ba_snr_env': time_av_ba_snr_env, 359 | 'time_av_bu_snr_env': time_av_bu_snr_env, 360 | 'bu_mix_mr_exc_ptns': bu_mix_mr_exc_ptns, 361 | 'bu_noise_mr_exc_ptns': bu_noise_mr_exc_ptns, 362 | self._key_alpha: alphas, 363 | self._key_tau: taus 364 | }) 365 | return res # Results for each ear's sEPSM model. 366 | 367 | def plot_alpha(self, res): 368 | alphas = res[self._key_alpha] 369 | t = np.arange(alphas.shape[-1]) * self.overlap * self.win_len 370 | plot(t, alphas.T) 371 | xlabel('Time (sec)') 372 | ylabel('$alpha_0$ gains (L / R)') 373 | legend(self.cf, 374 | loc='outside', 375 | bbox_to_anchor=(1.05, 1)) 376 | 377 | def plot_alpha_hist(self, res, ymax=None): 378 | alphas = res[self._key_alpha] 379 | plt.boxplot(alphas.T, labels=self.cf) 380 | plt.setp(plt.xticks()[1], rotation=30) 381 | xlabel('Channel frequency (Hz)') 382 | ylabel(r'$\alpha_0$ gains (L / R)') 383 | plt.ylim([0, ymax]) 384 | plt.xticks(rotation=30) 385 | 386 | def plot_tau(self, res): 387 | tau = res[self._key_tau] 388 | t = np.arange(tau.shape[-1]) * self.overlap * self.win_len 389 | plot(t, tau.T) 390 | xlabel('Time (sec)') 391 | ylabel('Tau (s)') 392 | legend(self.cf, 393 | loc='outside', 394 | bbox_to_anchor=(1.05, 1)) 395 | 396 | def plot_tau_hist(self, res, cfs=None, bins=None, return_ax=False): 397 | """Plot histogram of tau values. 398 | 399 | Parameters 400 | ---------- 401 | res : dict 402 | Results from the `predict` function. 403 | cfs : list 404 | Index of center frequencies to plot. 405 | bins : int 406 | Number of bins in the histogram. If `None`, uses bins between 407 | -700 us and 700 us. Default is `None`. 408 | return_ax : bool, optional 409 | If True, returns the figure Axes. Default is False. 410 | """ 411 | taus = res[self._key_tau] 412 | 413 | edges = np.max(np.abs(taus)) 414 | 415 | if bins is None: 416 | bins = np.arange(-edges, edges, 20e-6) 417 | 418 | # Put together all ITDs if no particular channel is chosen... 419 | if cfs is None: 420 | fig, ax = plt.subplots(1, 1) 421 | ax.hist(taus.ravel(), bins=bins) 422 | ax.set_ylabel('Count for all channels') 423 | else: 424 | # ... or create N subplots if more than one channel is chosen. 425 | try: 426 | iter(cfs) 427 | except TypeError: 428 | cfs = (cfs, ) 429 | cfs = cfs[::-1] 430 | fig, axes = plt.subplots(len(cfs), 1, sharex=True, sharey=True) 431 | try: 432 | iter(axes) 433 | except TypeError: 434 | axes = (axes,) 435 | for ax, cf in zip(axes, cfs): 436 | ax.hist(taus[cf], bins=bins) 437 | [ax.set_ylabel('@ {} Hz'.format(self.cf[i_cf])) 438 | for ax, i_cf in zip(axes, cfs)] 439 | 440 | ax.set_xlabel('Interaural delay (ms)') 441 | # ax.set_xlim((-800e-6, 800e-6)) 442 | ticks = ax.get_xticks() 443 | ax.set_xticklabels(ticks * 1e3) 444 | if return_ax and cfs is None: 445 | return axes 446 | else: 447 | return ax 448 | 449 | def _extract_env(self, channel_sigs): 450 | """Extracts the envelope via half-wave rectification and low-pass 451 | filtering and jitters the envelopes. 452 | 453 | Parameters 454 | ---------- 455 | channel_sigs : ndarray 456 | Peripheral subband signals. 457 | 458 | Returns 459 | ------- 460 | env : ndarray 461 | 462 | """ 463 | envelopes = np.maximum(channel_sigs, 0) 464 | b, a = butter(self.env_lp_order, self.env_lp_cutoff * 2. / self.fs) 465 | envelopes = filtfilt(b, a, envelopes) 466 | 467 | epsilons, deltas = self._ec_process.create_jitter(envelopes[0]) 468 | for i_sig, signal in enumerate(envelopes): 469 | envelopes[i_sig] = self._ec_process.apply_jitter(signal, epsilons, deltas) 470 | return envelopes 471 | 472 | def _mod_sensitivity(self, envs): 473 | """Doesn't do anything to the envelopes""" 474 | return envs -------------------------------------------------------------------------------- /pambox/speech/material.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | The :mod:`pambox.speech.material` module gathers classes to facilitate 4 | working with different speech materials. 5 | """ 6 | from __future__ import absolute_import, division, print_function 7 | import glob 8 | import logging 9 | import os 10 | import random 11 | 12 | import numpy as np 13 | import scipy.io.wavfile 14 | from six.moves import zip, range 15 | 16 | from pambox import utils 17 | 18 | log = logging.getLogger(__name__) 19 | 20 | 21 | class Material(object): 22 | """Load and manipulate speech materials for intelligibility experiments""" 23 | 24 | def __init__(self, 25 | fs=22050, 26 | path_to_sentences='../stimuli/clue/sentencesWAV22', 27 | path_to_maskers=None, 28 | path_to_ssn='../stimuli/clue/SSN_CLUE22.wav', 29 | ref_level=74, 30 | name='CLUE', 31 | force_mono=False): 32 | """ 33 | 34 | """ 35 | self.fs = fs 36 | self.path_to_sentences = path_to_sentences 37 | self.path_to_maskers = path_to_maskers 38 | self.ref_level = ref_level 39 | self.name = name 40 | self._ssn = None 41 | self._path_to_ssn = None 42 | self.path_to_ssn = path_to_ssn 43 | self.force_mono = force_mono 44 | self._files = None 45 | self._audio_ext = '.wav' 46 | 47 | @property 48 | def files(self): 49 | if not self._files: 50 | self._files = self.files_list() 51 | return self._files 52 | 53 | @files.setter 54 | def files(self, f): 55 | self._files = f 56 | 57 | @property 58 | def path_to_ssn(self): 59 | return self._path_to_ssn 60 | 61 | @path_to_ssn.setter 62 | def path_to_ssn(self, path): 63 | if path: 64 | self._path_to_ssn = path 65 | self._ssn = self._load_ssn() 66 | 67 | def load_file(self, filename): 68 | """Read a speech file by name. 69 | 70 | Parameters 71 | ---------- 72 | filename : string 73 | Name of the file to read. The file just be in the directory 74 | defined by `root_path` and `path_to_sentences`. 75 | 76 | Returns 77 | ------- 78 | ndarray 79 | Wav file read from disk, as floating point array. 80 | """ 81 | path = os.path.join(self.path_to_sentences, filename) 82 | log.info('Reading file %s', path) 83 | _, int_sentence = scipy.io.wavfile.read(path) 84 | sent = int_sentence.T / np.iinfo(int_sentence.dtype).min 85 | if self.force_mono and sent.ndim == 2: 86 | return sent[1] 87 | else: 88 | return sent 89 | 90 | def files_list(self): 91 | """Return a list of all the .wav files in the `path_to_sentences` 92 | directory. 93 | 94 | Returns 95 | ------- 96 | files : list 97 | List of all files. 98 | """ 99 | path = os.path.join(self.path_to_sentences, '') 100 | log.info("Listing files from directory: %s", path) 101 | return [os.path.basename(each) for each in glob.glob(path + '*' + 102 | self._audio_ext)] 103 | 104 | def load_files(self, n=None): 105 | """Read files from disk, starting from the first one. 106 | 107 | Parameters 108 | ---------- 109 | n : int, optional 110 | Number of files to read. Default (`None`) is to read all files. 111 | 112 | Returns 113 | ------- 114 | generator 115 | Generator where each item is an `ndarray` of the file loaded. 116 | """ 117 | if not n: 118 | n = len(self.files) 119 | 120 | for _, name in zip(list(range(n)), self.files): 121 | yield self.load_file(name) 122 | 123 | def _load_ssn(self): 124 | try: 125 | filepath = self.path_to_ssn 126 | _, int_sentence = scipy.io.wavfile.read(filepath) 127 | ssn = int_sentence.T / np.iinfo(int_sentence.dtype).min 128 | except IOError: 129 | raise IOError('File not found: %s' % filepath) 130 | return ssn 131 | 132 | @staticmethod 133 | def pick_section(signal, section=None): 134 | """Pick section of signal 135 | 136 | Parameters 137 | ---------- 138 | section : int or ndarray, optional 139 | If an integer is given, returns section of length `n` 140 | Alternatively, if `section` is an ndarray the signal returned 141 | will be of the same length as the `section` signal. If `x` is 142 | `None`, the full signal is returned. 143 | Returns 144 | ------- 145 | ndarray 146 | Speech-shaped noise signal. 147 | """ 148 | len_noise = signal.shape[-1] 149 | if section is None: 150 | len_sig = len_noise 151 | ii = 0 152 | elif isinstance(section, int): 153 | len_sig = section 154 | ii = np.random.randint(0, len_noise - len_sig) 155 | else: 156 | len_sig = np.asarray(section).shape[-1] 157 | ii = np.random.randint(0, len_noise - len_sig) 158 | return signal[..., ii:ii + len_sig] 159 | 160 | def ssn(self, x=None): 161 | """Returns the speech-shaped noise appropriate for the speech material. 162 | 163 | Parameters 164 | ---------- 165 | x : int or ndarray, optional 166 | If an integer is given, returns a speech-shaped noise of length 167 | `n` Alternatively, if a sentenced is given, the speech-shaped 168 | noise returned will be of the same length as the input signal. 169 | If `x` is `None`, the full SSN signal is returned. 170 | Returns 171 | ------- 172 | ndarray 173 | Speech-shaped noise signal. 174 | """ 175 | section = self.pick_section(self._ssn, x) 176 | if self.force_mono and section.ndim > 1: 177 | return section[0] 178 | return section 179 | 180 | def set_level(self, x, level): 181 | """Set level of a sentence, in dB. 182 | 183 | Parameters 184 | ---------- 185 | x : ndarray 186 | sentence 187 | level : float 188 | Level, in dB, at which the sentences are recorded. The reference 189 | is that and RMS of 1 corresponds to 0 dB SPL. 190 | 191 | Returns 192 | ------- 193 | array_like 194 | Adjusted sentences with a `level` db SPL with the reference 195 | that a signal with an RMS of 1 corresponds to 0 db SPL. 196 | """ 197 | return x * 10 ** ((level - self.ref_level) / 20) 198 | 199 | def average_level(self): 200 | """Calculate the average level across all sentences. 201 | 202 | The levels are calculated according to the toolbox's reference 203 | level. 204 | 205 | Returns 206 | ------- 207 | mean : float 208 | Mean level across all sentences, in dB SPL. 209 | std : float 210 | Standard deviation of the levels across all sentences. 211 | 212 | See also 213 | -------- 214 | utils.dbspl 215 | """ 216 | spl = [utils.dbspl(x) for x in self.load_files()] 217 | return np.mean(spl), np.std(spl) 218 | 219 | def create_ssn(self, files=None, repetitions=200): 220 | """Creates a speech-shaped noise from the sentences. 221 | 222 | Creates a speech-shaped noise by randomly adding together sentences 223 | from the speech material. The output noise is 75% the length of all 224 | concatenated sentences. 225 | 226 | Parameters 227 | ---------- 228 | files : list, optional 229 | List of files to concatenate. Each file should be an `ndarray`. 230 | If `files` is None, all the files from the speech material 231 | will be used. They are loaded with the method `load_files()`. 232 | repetitions : int 233 | Number of times to superimpose the randomized sentences. The 234 | default is 120 times. 235 | 236 | Returns 237 | ------- 238 | ssn : ndarray 239 | 240 | Notes 241 | ----- 242 | Before each addition, the random stream of sentences is jittered to 243 | prevent perfect alignment of all sentences. The maximum jitter is 244 | equal to 25% of the length of the concatenated sentences. 245 | """ 246 | if files is None: 247 | files = [each for each in self.load_files()] 248 | ssn = np.hstack(files) 249 | n_output = int(0.75 * ssn.shape[-1]) 250 | max_jitter = ssn.shape[-1] - n_output 251 | ssn = ssn[..., :n_output] 252 | for _ in range(repetitions): 253 | random.shuffle(files) 254 | start = np.random.randint(max_jitter) 255 | ssn += np.hstack(files)[..., start:start + n_output] 256 | ssn /= np.sqrt(repetitions) 257 | return ssn 258 | 259 | def create_filtered_ssn(self, files=None, duration=5): 260 | """Create speech-shaped noise based on the average long-term spectrum 261 | of the speech material. 262 | 263 | Parameters 264 | ---------- 265 | files : list, optional 266 | List of files to concatenate. Each file should be an `ndarray`. 267 | If `files` is None, all the files from the speech material 268 | will be used. They are loaded with the method `load_files()`. 269 | duration : float, optional 270 | Duration of the noise, in seconds. The default is 5 seconds. 271 | 272 | Returns 273 | ------- 274 | ssn : ndarray 275 | """ 276 | if files is None: 277 | files = tuple(self.load_files()) 278 | 279 | # Find maximum sentence length 280 | max_len = reduce(lambda x, y: max(x, y.shape[-1]), files, 0) 281 | n_fft = utils.next_pow_2(max_len) 282 | 283 | # Calculate the average spectra 284 | LONG_TERM_SPEC = reduce(lambda x, y: (x + np.fft.rfft(y, n_fft)) / 2, 285 | files, 286 | 0) 287 | 288 | average_masker = np.real(np.fft.irfft(LONG_TERM_SPEC, n=n_fft))[..., :max_len] 289 | 290 | n_noise = duration * self.fs 291 | if average_masker.ndim > 1: 292 | noise_shape = [average_masker.shape[0], n_noise] 293 | else: 294 | noise_shape = [n_noise] 295 | 296 | n_fft_noise = utils.next_pow_2(n_noise) 297 | NOISE = np.fft.rfft(np.random.randn(*noise_shape), n_fft_noise) 298 | NOISE /= np.abs(NOISE) 299 | NOISE *= np.abs(np.fft.rfft(average_masker, n_fft_noise)) 300 | ssn = np.real(np.fft.irfft(NOISE, n_fft_noise))[..., :n_noise] 301 | return ssn 302 | -------------------------------------------------------------------------------- /pambox/speech/mrsepsm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | 4 | import matplotlib.pyplot as plt 5 | from mpl_toolkits.axes_grid1 import ImageGrid 6 | import numpy as np 7 | from six.moves import zip 8 | 9 | from pambox.speech import Sepsm 10 | 11 | 12 | class MrSepsm(Sepsm): 13 | """Multi-resolution envelope power spectrum model (mr-sEPSM). 14 | 15 | Parameters 16 | ---------- 17 | fs : int, optional, (Default value = 22050) 18 | Sampling frequency. 19 | cf : array_like, optional 20 | Center frequency of the cochlear filters. 21 | modf : array_like, optional (Default value = _default_modf) 22 | Center frequency of modulation filters. 23 | downsamp_factor : int, optional, (Default value = 10) 24 | Envelope downsampling factor. Simply used to make calculattion faster. 25 | noise_floor : float, optional, (Default value = 0.001) 26 | Value of the internal noise floor of the model. The default is -30 dB. 27 | snr_env_limit : float, optional, (Default value = 0.001) 28 | Lower limit of the SNRenv values. Default is -30 dB. 29 | snr_env_ceil : float, optional, (Default value = None) 30 | Upper limit of the SNRenv. No limit is applied if `None`. 31 | min_win : float, optional, (Default value = None) 32 | Minimal duration of the multi-resolution windows, in ms. 33 | name : string, optional, (Default value = 'MrSepsm') 34 | Name of the model. 35 | output_time_signals : bool, optional 36 | Output the time signals signals in the results dictionary. Adds the 37 | keys 'chan_sigs', 'chan_envs', and 'filtered_envs'. 38 | 39 | References 40 | ---------- 41 | .. [jorgensen2013multi] S. Joergensen, S. D. Ewert, and T. Dau: A 42 | multi-resolution envelope-power based model for speech 43 | intelligibility. J Acoust Soc Am 134 (2013) 436--446. 44 | 45 | """ 46 | 47 | # Default center frequencies of the modulation filterbank. 48 | _default_modf = (1., 2., 4., 8., 16., 32., 64., 128., 256.) 49 | _default_center_cf = (63, 80, 100, 125, 160, 200, 250, 315, 400, 500, 630, 50 | 800, 1000, 1250, 1600, 2000, 2500, 3150, 4000, 5000, 51 | 6300, 8000) 52 | 53 | 54 | def __init__(self, fs=22050, cf=_default_center_cf, 55 | modf=_default_modf, 56 | downsamp_factor=10, 57 | noise_floor=0.001, 58 | snr_env_limit=0.001, 59 | snr_env_ceil=None, 60 | min_win=None, 61 | name='MrSepsm', 62 | output_time_signals=False 63 | ): 64 | super(MrSepsm, self).__init__(fs, cf, modf, downsamp_factor, 65 | noise_floor, snr_env_limit) 66 | self.min_win = min_win 67 | self.name = name 68 | self.snr_env_ceil = snr_env_ceil 69 | self.output_time_signals = output_time_signals 70 | 71 | def _mr_env_powers(self, channel_envs, filtered_envs): 72 | """Calculates the envelope power in multi-resolution windows. 73 | 74 | Parameters 75 | ---------- 76 | channel_env : ndarray 77 | Envelope of the peripheral channel. The shape should be (N_SIG, 78 | N_CHAN, N) 79 | filtered_envs : ndarray 80 | Filtered envelope. The shape should be (N_SIG, N_CHAN, N_MODF, N) 81 | 82 | Returns 83 | ------- 84 | mr_env_powers : masked_array 85 | Multi-resolution envelope powers of shape (N_SIG, N_CHAN, N_MODF, 86 | N_SEG), where N_SEG is the maximum number of segments in the 87 | multi-resolution process, which is for the highest modulation 88 | center frequency. For a 1 sec sample, it is about 400 segments. 89 | Low modulation frequencies come first. 90 | 91 | """ 92 | # Here we find the duration and the number of windows for each 93 | # modulation center frequency... 94 | len_env = filtered_envs.shape[-1] 95 | win_durations = 1. / np.asarray(self.modf, dtype='float') 96 | if self.min_win is not None: 97 | win_durations[win_durations < self.min_win] = self.min_win 98 | win_lengths = np.floor(win_durations * self.fs / self 99 | .downsamp_factor).astype('int') 100 | n_segments = np.ceil(len_env / win_lengths).astype('int') 101 | 102 | # ... then we calculate the DC power used for the normalization. 103 | # We divide it by 2 such that a fully modulated signal has an 104 | # AC-power of 1... 105 | dc_power = np.mean(channel_envs, axis=-1) ** 2 / 2 106 | 107 | # ... then we create a masked array of zeros, where all entries are 108 | # hidden... 109 | mr_env_powers = np.ma.masked_all( 110 | (filtered_envs.shape[:-1] + (np.max(n_segments),)) 111 | ) 112 | 113 | # ... and start looping throug the input signals... 114 | for i_sig, sig_envs in enumerate(filtered_envs): 115 | # ... and the channels... 116 | for i_chan, chan_envs in enumerate(sig_envs): 117 | # ... and the modulation channels, zipped with the window 118 | # lengths and number of segments... 119 | for i_modf, (n_seg, win_length, env) in enumerate( 120 | zip(n_segments, win_lengths, chan_envs)): 121 | n_complete_seg = n_seg - 1 122 | last_idx = int(n_complete_seg * win_length) 123 | # ... we reshape to n_seg x win_length so that we can 124 | # calculate the variance in a single operation... 125 | tmp_env = env[:last_idx].reshape((-1, win_length)) 126 | # ... then we normalize the variance by N-1, like in MATLAB. 127 | tmp_env_powers = np.var(tmp_env, axis=-1, ddof=1) / \ 128 | dc_power[i_sig, i_chan] 129 | # ... and treat the last segment independently, just in 130 | # case it is not # complete, i.e. it is shorter than the 131 | # window length. 132 | tmp_env_powers_last = np.var(env[last_idx:], ddof=1) / \ 133 | dc_power[i_sig, i_chan] 134 | # ... then we save it in our masked_array... 135 | mr_env_powers[i_sig, i_chan, i_modf, :n_complete_seg] = \ 136 | tmp_env_powers 137 | mr_env_powers[i_sig, i_chan, i_modf, n_complete_seg] = \ 138 | tmp_env_powers_last 139 | # ... and finally make these values visible through the 140 | # mask. 141 | mr_env_powers.mask[i_sig, i_chan, i_modf, :n_seg] = False 142 | 143 | mr_env_powers[np.isnan(mr_env_powers) & ~mr_env_powers.mask] = 0 144 | return mr_env_powers 145 | 146 | @staticmethod 147 | def _time_average(mr_snr_env): 148 | """Averages the multi-resolution SNRenv over time. 149 | 150 | Parameters 151 | ---------- 152 | mr_snr_env : ndarray 153 | 154 | Returns 155 | ------- 156 | ndarray 157 | 158 | """ 159 | return mr_snr_env.mean(axis=-1) 160 | 161 | def _mr_snr_env(self, p_mix, p_noise): 162 | """Calculates the multi-resolution SNRenv. 163 | 164 | Parameters 165 | ---------- 166 | p_mix, p_noise : ndarrays 167 | Envelope power of the mixture and of the noise alone. 168 | 169 | Returns 170 | ------- 171 | mr_snr_env : masked_array 172 | Multi-resolution `masked_array` of SNRenv. 173 | exc_ptns : list of masked_arrays 174 | Multi-resolution values of the mixture and of the noise alone. 175 | 176 | """ 177 | 178 | # First we limit the noise such that it cannot exceed the mix, 179 | # since they exist at the same time... 180 | p_noise = np.minimum(p_noise, p_mix) 181 | 182 | # ... then we restrict the noise floor restricted to set value 183 | # reflecting an internal noise threshold... 184 | p_mix = np.maximum(p_mix, self.noise_floor) 185 | p_noise = np.maximum(p_noise, self.noise_floor) 186 | 187 | # ... and we calculate the snrenv according to Eq (2) in Joergensen 188 | # et al. (2013)... 189 | mr_snr_env = (p_mix - p_noise) / p_noise 190 | mr_snr_env = np.maximum(mr_snr_env, self.snr_env_limit) 191 | if self.snr_env_ceil is not None: 192 | mr_snr_env = np.minimum(mr_snr_env, self.snr_env_ceil) 193 | 194 | return mr_snr_env, [p_mix, p_noise] 195 | 196 | def predict(self, clean=None, mix=None, noise=None): 197 | """Predicts intelligibility using the mr-sEPSM. 198 | 199 | The mr-sEPSM requires at least the mix and the noise alone to make a 200 | prediction. The clean signal will also be processed if it is 201 | available, but it is not used to make the prediction. 202 | 203 | Parameters 204 | ---------- 205 | clean : ndarray (optional) 206 | Clean speech signal, optional. 207 | mix : ndarray 208 | Mixture of the processed speech and noise. 209 | noise : ndarrays 210 | 211 | Returns 212 | ------- 213 | dict 214 | Dictionary with the predictions by the model. 215 | 216 | Notes 217 | ----- 218 | 219 | 220 | """ 221 | if clean is None: 222 | signals = np.vstack((mix, noise)) 223 | else: 224 | signals = np.vstack((clean, mix, noise)) 225 | 226 | bands_above_thres_idx = self._find_bands_above_thres(mix) 227 | channel_sigs = self._peripheral_filtering(signals) 228 | channel_envs = self._extract_env(channel_sigs) 229 | channel_envs = self._mod_sensitivity(channel_envs) 230 | filtered_envs, lt_exc_ptns = self._mod_filtering(channel_envs) 231 | mr_exc_ptns = self._mr_env_powers(channel_envs, filtered_envs) 232 | mr_snr_env_matrix, _ = self._mr_snr_env( 233 | mr_exc_ptns[-2], mr_exc_ptns[-1] 234 | ) 235 | time_av_snr_env_matrix = self._time_average(mr_snr_env_matrix) 236 | 237 | lt_snr_env_matrix, _ = super(MrSepsm, self)._snr_env(*lt_exc_ptns[-2:]) 238 | lt_snr_env = super(MrSepsm, self)._optimal_combination( 239 | lt_snr_env_matrix, bands_above_thres_idx 240 | ) 241 | 242 | snr_env = self._optimal_combination( 243 | time_av_snr_env_matrix, bands_above_thres_idx 244 | ) 245 | 246 | res = { 247 | 'p': { 248 | 'snr_env': snr_env, 249 | 'lt_snr_env': lt_snr_env, 250 | }, 251 | 'snr_env_matrix': time_av_snr_env_matrix, 252 | 253 | # Output of what is essentially the sEPSM. 254 | 'lt_snr_env_matrix': lt_snr_env_matrix, 255 | 'lt_exc_ptns': lt_exc_ptns, 256 | 257 | 'mr_snr_env_matrix': mr_snr_env_matrix, 258 | 'mr_exc_ptns': mr_exc_ptns, 259 | 260 | 'bands_above_thres_idx': bands_above_thres_idx 261 | } 262 | if self.output_time_signals: 263 | res['chan_sigs'] = channel_sigs 264 | res['chan_envs'] = channel_envs 265 | res['filtered_envs'] = filtered_envs 266 | return res 267 | 268 | 269 | def _optimal_combination(self, snr_env, bands_above_thres_idx): 270 | """Combines SNRenv across audio and modulation channels. 271 | 272 | Only modulation channels below 1/4 of the audio center frequency are 273 | considered. 274 | 275 | Parameters 276 | ---------- 277 | snr_env : ndarray 278 | Linear values of SNRenv 279 | bands_above_thres_idx : ndarray 280 | Index of audio channels above threshold. 281 | 282 | Returns 283 | ------- 284 | float 285 | SNRenv value. 286 | 287 | """ 288 | 289 | # Calculate mask for modulation frequencies larger than 1/4 center 290 | # of their channel. 291 | ma = np.tile(np.asarray(self.modf), (len(self.cf), 1)) \ 292 | >= np.asarray(self.cf)[:, np.newaxis] / 4 293 | # Set the reject SNRenvs to 0. It's ok because we're just doing a 294 | # summation 295 | snr_env[ma] = 0. 296 | # Combine across modulation filters... 297 | snr_env = np.sqrt(np.sum(snr_env[bands_above_thres_idx] ** 2, 298 | axis=-1)) 299 | # ... and peripheral filters. 300 | snr_env = np.sqrt(np.sum(snr_env ** 2)) 301 | return snr_env 302 | 303 | @staticmethod 304 | def _plot_mr_matrix(mat, x=None, y=None, fig=None, subplot_pos=111): 305 | """ 306 | 307 | Parameters 308 | ---------- 309 | mat : 310 | x : 311 | (Default value = None) 312 | y : array_like 313 | (Default value = None) 314 | x : arra_like 315 | (Default value = None) 316 | fig : 317 | (Default value = None) 318 | subplot_pos : 319 | (Default value = 111) 320 | 321 | Returns 322 | ------- 323 | 324 | """ 325 | 326 | n_y, n_x = mat.shape 327 | if y is None: 328 | y = np.arange(n_y) 329 | 330 | max_mat = mat.max() 331 | bmap = plt.get_cmap('PuBu') 332 | 333 | if fig is None: 334 | fig = plt.figure() 335 | else: 336 | pass 337 | 338 | grid = ImageGrid(fig, subplot_pos, 339 | nrows_ncols=(n_y, 1), 340 | aspect=False, 341 | share_all=False, 342 | cbar_mode='single', 343 | cbar_location='right', 344 | cbar_size='0.5%', 345 | cbar_pad=0.05) 346 | 347 | for ax, p, f in zip(grid, mat[::-1], y[::-1]): 348 | values = p.compressed() 349 | extent = (0, 1, 0, 1) 350 | im = ax.imshow(values[np.newaxis, :], 351 | aspect='auto', 352 | interpolation='none', 353 | extent=extent, 354 | vmax=max_mat, 355 | cmap=bmap) 356 | ax.grid(False) 357 | ax.set_yticks([0.5]) 358 | ax.set_yticklabels([f]) 359 | return im 360 | 361 | def plot_mr_exc_ptns(self 362 | , ptns 363 | , dur=None 364 | , db=True 365 | , vmin=None 366 | , vmax=None 367 | , fig_subplt=None 368 | , attr='exc_ptns' 369 | , add_cbar=True 370 | , add_ylabel=True 371 | , title=None 372 | ): 373 | """Plots multi-resolution representation of envelope powers. 374 | 375 | Parameters 376 | ---------- 377 | ptns : dict 378 | Predictions from the model. Must have a `mr_snr_env_matrix` 379 | key. 380 | dur : bool 381 | Display dB values of the modulation power or SNRenv values. (Default: True.) 382 | vmax : float 383 | Maximum value of the colormap. If `None`, 384 | the data's maxium value is used. (Default: None) 385 | db : bool 386 | Plot the values in dB. (Default value = True) 387 | vmin : float 388 | Minimum value of the heatmap. The value will be infered from the 389 | data if `None`. (Default value = None) 390 | fig_subplt : tuple of (fig, axes) 391 | Matplotlib `figure` and `axes` objects where the data should be 392 | plotted. If `None` is provided, a new figures with the necessary 393 | axes will be created. (Default value = None) 394 | attr : string 395 | Key to query in the `ptns` dictionary. (Default value = 'exc_ptns') 396 | add_cbar : bool 397 | Add a colorbar to the figure. (Default value = True) 398 | add_ylabel : bool 399 | Add a y-label to the axis. (Default value = True) 400 | title : bool 401 | Add a title to the axis. (Default value = None) 402 | 403 | Returns 404 | ------- 405 | 406 | """ 407 | 408 | mf = self.modf 409 | 410 | if 'exc_ptns' in attr: 411 | cbar_label = 'Modulation power' 412 | else: 413 | cbar_label = 'SNRenv' 414 | 415 | if db: 416 | ptns = 10 * np.log10(ptns) 417 | cbar_label += ' [dB]' 418 | else: 419 | ptns = ptns 420 | cbar_label += ' [lin]' 421 | 422 | if vmax is None: 423 | vmax = ptns.max() 424 | if vmin is None: 425 | vmin = ptns.min() 426 | 427 | n_mf, n_win = ptns.shape 428 | 429 | if dur is None: 430 | dur = n_win / self.modf[-1] 431 | 432 | if fig_subplt is None: 433 | fig = plt.figure() 434 | subplt = 111 435 | else: 436 | fig, subplt = fig_subplt 437 | 438 | if add_cbar: 439 | cbar_dict = { 440 | 'cbar_mode':'single', 441 | 'cbar_location':'right', 442 | 'cbar_size':'0.5%', 443 | 'cbar_pad':0.05} 444 | else: 445 | cbar_dict = {} 446 | 447 | bmap = plt.get_cmap('PuBu') 448 | xlabel = "Time [s]" 449 | ylabel = "Modulation frequency [Hz]" 450 | grid = ImageGrid(fig, subplt, 451 | nrows_ncols=(n_mf, 1), 452 | aspect=False, 453 | # axes_pad=0.05, 454 | # add_all=True, 455 | share_all=False, 456 | **cbar_dict) 457 | 458 | for ax, p, f in zip(grid, ptns[::-1], mf[::-1]): 459 | try: 460 | values = p.compressed() 461 | except AttributeError: 462 | values = p 463 | extent = (0, 1, 0, 1) 464 | im = ax.imshow(values[np.newaxis, :], 465 | aspect='auto', 466 | interpolation='none', 467 | extent=extent, 468 | vmin=vmin, 469 | vmax=vmax, 470 | cmap=bmap) 471 | ax.grid(False) 472 | # ax.set_yticks([0.5]) 473 | ax.set_yticklabels([f]) 474 | 475 | if add_cbar: 476 | cbar = grid.cbar_axes[0].colorbar(im) 477 | cbar.ax.set_ylabel(cbar_label) 478 | 479 | xticks_labels = np.arange(0, dur, 0.2) 480 | xticks = np.linspace(0, 1, len(xticks_labels)) 481 | grid[-1].set_xticks(xticks) 482 | grid[-1].set_xticklabels(xticks_labels) 483 | grid[-1].set_xlabel(xlabel) 484 | if title: 485 | grid[0].set_title(title, ) 486 | if add_ylabel: 487 | grid[n_mf // 2].set_ylabel(ylabel, labelpad=20) 488 | 489 | # fig.text(0.05, 0.5, ylabel, va='center', rotation=90, size=11) 490 | return fig 491 | 492 | -------------------------------------------------------------------------------- /pambox/speech/sii.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | from numbers import Number 4 | 5 | import numpy as np 6 | from numpy import log10, sum, asarray, zeros, ones 7 | 8 | 9 | class SII(object): 10 | """Speech intelligibility index model, [ansi1997sii]_. 11 | 12 | Parameters 13 | ---------- 14 | T : float or array_like, optional, (Default is 0) 15 | Hearing threshold. 18 values in dB HL or a single value. 16 | I : int, optional, (Default is 0, normal speech) 17 | Band importance function selector. See Notes section below. 18 | 19 | Notes 20 | ----- 21 | Arguments for 'I': 22 | 23 | A scalar having a value of either 0, 1, 2, 3, 4, 5, or 6. The 24 | Band-importance functions associated with each scalar are: 25 | 26 | * 0: Average speech as specified in Table 3 (DEFAULT) 27 | * 1: various nonsense syllable tests where most English phonemes occur 28 | equally often (as specified in Table B.2) 29 | * 2: CID-22 (as specified in Table B.2) 30 | * 3: NU6 (as specified in Table B.2) 31 | * 4: Diagnostic Rhyme test (as specified in Table B.2) 32 | * 5: short passages of easy reading material (as specified in 33 | Table B.2) 34 | * 6: SPIN (as specified in Table B.2) 35 | 36 | References 37 | ---------- 38 | .. [ansi1997sii] American National Standards Institute: American 39 | National Standard methods for calculation of the Speech Intelligibility 40 | Index (1997). 41 | """ 42 | 43 | def __init__(self, T=0., I=0): 44 | T = np.atleast_1d(T) 45 | 46 | if len(T) == 1: 47 | T = np.ones(18) * T 48 | if len(T) != 18: 49 | raise ValueError("The length of T should be 18.") 50 | if I not in list(range(7)): 51 | raise ValueError("Band importance should be an integer between 0 \ 52 | and 6 inclusive.") 53 | self.T = asarray(T) 54 | self.I = int(I) 55 | 56 | # Band center frequencies for 1/3rd octave procedure (Table 3) 57 | self.f = asarray([160, 200, 250, 315, 400, 500, 630, 800, 1000, 1250, 58 | 1600, 2000, 2500, 3150, 4000, 5000, 6300, 8000]) 59 | 60 | # Define Internal Noise Spectrum Level (Table 3) 61 | self.X = asarray([0.6, -1.7, -3.9, -6.1, -8.2, -9.7, -10.8, -11.9, 62 | -12.5, -13.5, -15.4, -17.7, -21.2, -24.2, -25.9, 63 | -23.6, -15.8, -7.1]) 64 | 65 | # Equivalent Internal Noise Spectrum Level (4.4 Eq. 10) 66 | self.X = self.X + self.T 67 | 68 | self.BIArr = asarray( 69 | [0.0083, 0, 0.0365, 0.0168, 0, 0.0114, 0, 70 | 0.0095, 0, 0.0279, 0.013, 0.024, 0.0153, 0.0255, 71 | 0.015, 0.0153, 0.0405, 0.0211, 0.033, 0.0179, 0.0256, 72 | 0.0289, 0.0284, 0.05, 0.0344, 0.039, 0.0558, 0.036, 73 | 0.044, 0.0363, 0.053, 0.0517, 0.0571, 0.0898, 0.0362, 74 | 0.0578, 0.0422, 0.0518, 0.0737, 0.0691, 0.0944, 0.0514, 75 | 0.0653, 0.0509, 0.0514, 0.0658, 0.0781, 0.0709, 0.0616, 76 | 0.0711, 0.0584, 0.0575, 0.0644, 0.0751, 0.066, 0.077, 77 | 0.0818, 0.0667, 0.0717, 0.0664, 0.0781, 0.0628, 0.0718, 78 | 0.0844, 0.0774, 0.0873, 0.0802, 0.0811, 0.0672, 0.0718, 79 | 0.0882, 0.0893, 0.0902, 0.0987, 0.0961, 0.0747, 0.1075, 80 | 0.0898, 0.1104, 0.0938, 0.1171, 0.0901, 0.0755, 0.0921, 81 | 0.0868, 0.112, 0.0928, 0.0932, 0.0781, 0.082, 0.1026, 82 | 0.0844, 0.0981, 0.0678, 0.0783, 0.0691, 0.0808, 0.0922, 83 | 0.0771, 0.0867, 0.0498, 0.0562, 0.048, 0.0483, 0.0719, 84 | 0.0527, 0.0728, 0.0312, 0.0337, 0.033, 0.0453, 0.0461, 85 | 0.0364, 0.0551, 0.0215, 0.0177, 0.027, 0.0274, 0.0306, 86 | 0.0185, 0, 0.0253, 0.0176, 0.024, 0.0145, 0])\ 87 | .reshape(-1, 7) 88 | 89 | self.Ei = asarray([32.41, 33.81, 35.29, 30.77, 90 | 34.48, 33.92, 37.76, 36.65, 91 | 34.75, 38.98, 41.55, 42.5, 92 | 33.98, 38.57, 43.78, 46.51, 93 | 34.59, 39.11, 43.3, 47.4, 94 | 34.27, 40.15, 44.85, 49.24, 95 | 32.06, 38.78, 45.55, 51.21, 96 | 28.3, 36.37, 44.05, 51.44, 97 | 25.01, 33.86, 42.16, 51.31, 98 | 23, 31.89, 40.53, 49.63, 99 | 20.15, 28.58, 37.7, 47.65, 100 | 17.32, 25.32, 34.39, 44.32, 101 | 13.18, 22.35, 30.98, 40.8, 102 | 11.55, 20.15, 28.21, 38.13, 103 | 9.33, 16.78, 25.41, 34.41, 104 | 5.31, 11.47, 18.35, 28.24, 105 | 2.59, 7.67, 13.87, 23.45, 106 | 1.13, 5.07, 11.39, 20.72]).reshape(-1, 4) 107 | 108 | def _band_importance(self, test): 109 | """Get values of the band importance function. 110 | 111 | Parameters 112 | ---------- 113 | test : int 114 | Number of the band importance function to select. 115 | 116 | Returns 117 | ------- 118 | ndarray 119 | Band importance function. 120 | """ 121 | 122 | if test not in list(range(7)): 123 | raise ValueError("Band Importance function must be integer \ 124 | between 0 and 6.") 125 | return self.BIArr[:, test].T 126 | 127 | def _speech_spectrum(self, vcl_effort): 128 | """Returns the standard speech spectrum from Table 3. 129 | 130 | The spectrum depends on the vocal effort, possible values are 'normal', 131 | 'raised', 'loud', 'shout'. 132 | 133 | Parameters 134 | ---------- 135 | vcl_effort : string, 136 | Vocal effort 137 | 138 | Returns 139 | ------- 140 | ndarray 141 | 142 | """ 143 | efforts = {'normal': 0, 'raised': 1, 'loud': 2, 'shout': 3} 144 | if vcl_effort not in efforts: 145 | raise ValueError("Vocal error string not recognized.") 146 | return self.Ei[:, efforts[vcl_effort]] 147 | 148 | def predict_spec(self, clean=None, mix=None, noise=-50): 149 | """Predicts intelligibility based on the spectra levels of the speech 150 | and the noise. 151 | 152 | Parameters 153 | ---------- 154 | clean: array_like or float 155 | Speech level in dB SPL. Equivalent to "E" in [ansi1997sii]_. 156 | mix : ignored 157 | This argument is present only to conform to the API. 158 | noise: array_like, optional, (Default is -50 dB SPL) 159 | Noise level in dB SPL. Equivalent to N in [ansi1997sii]_. 160 | 161 | Returns 162 | ------- 163 | res: dict 164 | Dictionary with prediction values under res['p']['sii']. 165 | """ 166 | if isinstance(clean, Number): 167 | clean = clean * ones(18) 168 | if isinstance(noise, Number): 169 | noise = noise * ones(18) 170 | 171 | clean[np.isnan(clean)] = 0 172 | noise[np.isnan(noise)] = 0 173 | 174 | # Self-Speech Masking Spectrum (4.3.2.1 Eq. 5) 175 | V = clean - 24. 176 | 177 | # 4.3.2.2 178 | B = np.fmax(V, noise) 179 | 180 | # Calculate slope parameter Ci (4.3.2.3 Eq. 7) 181 | C = 0.6 * (B + 10. * log10(self.f) - 6.353) - 80. 182 | 183 | # Initialize Equivalent Masking Spectrum Level (4.3.2.4) 184 | Z = zeros(18) 185 | Z[0] = B[0] 186 | 187 | # Calculate Equivalent Masking Spectrum Level (4.3.2.5 Eq. 9) 188 | for i in range(1, 18): 189 | Z[i] = 10. * log10(10 ** (0.1 * noise[i]) 190 | + sum(10. ** (0.1 * (B[0:i] + 3.32 191 | * C[0:i] * log10(0.89 * self.f[i] 192 | / self.f[:i]))))) 193 | # Disturbance Spectrum Level (4.5) 194 | D = np.fmax(Z, self.X) 195 | 196 | # Level Distortion Factor (4.6 Eq. 11) 197 | L = 1. - (clean - self._speech_spectrum('normal') - 10.) / 160. 198 | L = np.fmin(1., L) 199 | 200 | # 4.7.1 Eq. 12 201 | K = (clean - D + 15.) / 30. 202 | K = np.fmin(1., np.fmax(0., K)) 203 | 204 | # Band Audibility Function (7.7.2 Eq. 13) 205 | A = L * K 206 | 207 | # Speech Intelligibility Index (4.8 Eq. 14) 208 | out = sum(self._band_importance(self.I) * A) 209 | res = { 210 | 'p': { 211 | 'sii': np.fmax(out, 0) 212 | } 213 | } 214 | return res 215 | -------------------------------------------------------------------------------- /pambox/tests/data/dummy_ssn.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/pambox/tests/data/dummy_ssn.wav -------------------------------------------------------------------------------- /pambox/tests/data/test_GammatoneFilterbank_filtering.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/pambox/tests/data/test_GammatoneFilterbank_filtering.mat -------------------------------------------------------------------------------- /pambox/tests/data/test_hilbert_env_and_lp_filtering_v1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/pambox/tests/data/test_hilbert_env_and_lp_filtering_v1.mat -------------------------------------------------------------------------------- /pambox/tests/data/test_mr_sepsm_full.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/pambox/tests/data/test_mr_sepsm_full.mat -------------------------------------------------------------------------------- /pambox/tests/data/test_mr_sepsm_mr_env_powers.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/pambox/tests/data/test_mr_sepsm_mr_env_powers.mat -------------------------------------------------------------------------------- /pambox/tests/data/test_mr_sepsm_mr_snr_env_mix.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/pambox/tests/data/test_mr_sepsm_mr_snr_env_mix.mat -------------------------------------------------------------------------------- /pambox/tests/data/test_mr_sepsm_mr_snr_env_noise.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/pambox/tests/data/test_mr_sepsm_mr_snr_env_noise.mat -------------------------------------------------------------------------------- /pambox/tests/data/test_mr_sepsm_snrenv_mr_v1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/pambox/tests/data/test_mr_sepsm_snrenv_mr_v1.mat -------------------------------------------------------------------------------- /pambox/tests/data/test_mr_sepsm_time_average_snr.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/pambox/tests/data/test_mr_sepsm_time_average_snr.mat -------------------------------------------------------------------------------- /pambox/tests/data/test_sepsm_spec_sub_0dB_kappa_0_8.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/achabotl/pambox/be1af869538e9e1781f990a9036856cb1153b58d/pambox/tests/data/test_sepsm_spec_sub_0dB_kappa_0_8.mat -------------------------------------------------------------------------------- /pambox/tests/data/test_sii.csv: -------------------------------------------------------------------------------- 1 | E,N,T,I,SII 2 | 40,0,0,0,0.93393 3 | 40,0,0,1,0.92471 4 | 40,0,0,2,0.94328 5 | 40,0,0,3,0.94088 6 | 40,0,0,4,0.94373 7 | 40,0,0,5,0.94534 8 | 40,0,0,6,0.93672 9 | 40,50,0,0,0.12247 10 | 40,50,0,1,0.12041 11 | 40,50,0,2,0.12513 12 | 40,50,0,3,0.12398 13 | 40,50,0,4,0.12408 14 | 40,50,0,5,0.12479 15 | 40,50,0,6,0.1228 16 | 40,50,60,0,0.11265 17 | 40,50,60,1,0.11439 18 | 40,50,60,2,0.102 19 | 40,50,60,3,0.1107 20 | 40,50,60,4,0.10959 21 | 40,50,60,5,0.10935 22 | 40,50,60,6,0.11119 23 | -------------------------------------------------------------------------------- /pambox/tests/test_bsepsm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import os 4 | 5 | import numpy as np 6 | from numpy import asarray 7 | from numpy.testing import assert_array_equal 8 | import pytest 9 | 10 | from pambox.speech import BsEPSM 11 | 12 | 13 | __DATA_ROOT__ = os.path.join(os.path.dirname(__file__), 'data') 14 | 15 | 16 | class TestBinauralMrSepsmReturnValues(object): 17 | 18 | @classmethod 19 | def setup_class(cls): 20 | cls.b = BsEPSM() 21 | 22 | @pytest.mark.parametrize("left, right, left_idx, right_idx, target", [ 23 | # Take best of each side 24 | (asarray((0, 1, 0, 1)), asarray((0, 0, 1, 1)), 25 | range(4), range(4), (0, 1, 1, 1)), 26 | # Take left only 27 | (asarray((0, 1, 0, 1)), asarray((0, 0, 1, 1)), 28 | np.zeros(4, dtype='int'), range(4), (0, 0, 1, 1)), 29 | # Take right only 30 | (asarray((0, 1, 0, 1)), asarray((0, 0, 1, 1)), 31 | range(4), np.zeros(4, dtype='int'), (0, 1, 0, 1)), 32 | # Pick from both sides 33 | (asarray((0, 1, 0, 5)), asarray((0, 0, 3, 4)), 34 | (0, 1), (2, 3), (0, 1, 3, 4)), 35 | 36 | ]) 37 | def test_better_ear(self, left, right, left_idx, right_idx, target): 38 | be = self.b._better_ear(left, right, left_idx, right_idx) 39 | assert_array_equal(be, target) 40 | 41 | @pytest.mark.parametrize("left, right, target", [ 42 | ({'bands_above_thres_idx': (0, 1)}, 43 | {'bands_above_thres_idx': (2, 3)}, 44 | ()), 45 | ({'bands_above_thres_idx': (2, 3)}, 46 | {'bands_above_thres_idx': (2, 3)}, 47 | (2, 3)), 48 | ({'bands_above_thres_idx': (0, 1)}, 49 | {'bands_above_thres_idx': (1, 3)}, 50 | 1), 51 | ]) 52 | def test_calculate_bu_bands_above_threshold(self, left, right, target): 53 | b = BsEPSM(cf=range(4)) 54 | bu_mask = b._calc_bu_bands_above_thres(left, right) 55 | assert_array_equal(bu_mask, target) 56 | 57 | @pytest.mark.parametrize("left, right, target", [ 58 | ({'bands_above_thres_idx': (1, 2)}, 59 | {'bands_above_thres_idx': (2, 3)}, 60 | (1, 2, 3)), 61 | ]) 62 | def test_calculate_be_bands_above_threshold(self, left, right, target): 63 | b = BsEPSM(cf=range(4)) 64 | be_mask = b._calc_be_bands_above_thres(left, right) 65 | assert_array_equal(be_mask, target) 66 | 67 | @pytest.mark.parametrize("left, right, target", [ 68 | # Take best of each side 69 | ({'mr_snr_env_matrix': asarray((0, 1, 0, 1)), 'bands_above_thres_idx': 70 | range(4)}, 71 | {'mr_snr_env_matrix': asarray((0, 0, 1, 1)), 'bands_above_thres_idx': 72 | range(4)}, 73 | (0, 1, 1, 1)), 74 | # Take left only 75 | ({'mr_snr_env_matrix': asarray((0, 1, 0, 1)), 'bands_above_thres_idx': 76 | np.zeros(4, dtype='int')}, 77 | {'mr_snr_env_matrix': asarray((0, 0, 1, 1)), 'bands_above_thres_idx': 78 | range(4)}, 79 | (0, 0, 1, 1)), 80 | # Take right only 81 | ({'mr_snr_env_matrix': asarray((0, 1, 0, 1)), 'bands_above_thres_idx': 82 | range(4)}, 83 | {'mr_snr_env_matrix': asarray((0, 0, 1, 1)), 'bands_above_thres_idx': 84 | np.zeros(4, dtype='int')}, 85 | (0, 1, 0, 1)), 86 | # Pick from both sides 87 | ({'mr_snr_env_matrix': asarray((0, 1, 0, 5)), 'bands_above_thres_idx': 88 | (0, 1)}, 89 | {'mr_snr_env_matrix': asarray((0, 0, 3, 4)), 'bands_above_thres_idx': 90 | (2, 3)}, 91 | (0, 1, 3, 4)), 92 | ]) 93 | def test_apply_be_process(self, left, right, target): 94 | out = self.b._apply_be_process(left, right) 95 | assert_array_equal(out, target) 96 | 97 | 98 | -------------------------------------------------------------------------------- /pambox/tests/test_central.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import os.path 4 | 5 | import numpy as np 6 | from numpy.testing import assert_allclose 7 | import pytest 8 | 9 | from pambox import central 10 | 11 | 12 | __DATA_ROOT__ = os.path.join(os.path.dirname(__file__), 'data') 13 | 14 | 15 | @pytest.fixture 16 | def data(): 17 | return np.asarray([0.28032187, 1.07108181, 3.35513227, 8.66774961, 18 | 18.61914334, 33.63172026, 51.87228063, 69.72236134, 19 | 83.79127082, 92.72205919, 97.28779782, 99.16754416]) 20 | 21 | 22 | @pytest.fixture 23 | def central_parameters(): 24 | return 3.74647303e+00, 5.15928999e-02, -9.09197905e-07, 8000. 25 | 26 | 27 | @pytest.fixture 28 | def snr(): 29 | return np.arange(-9, 3, 1) 30 | 31 | 32 | @pytest.fixture 33 | def snrenv(snr): 34 | return 10. ** np.linspace(-2, 2, len(snr)) 35 | 36 | 37 | def test_fit_obs(data, snrenv, central_parameters): 38 | c = central.IdealObs() 39 | c.fit_obs(snrenv, data) 40 | params = c.get_params() 41 | res = [params['k'], params['q'], params['sigma_s']] 42 | np.testing.assert_allclose(res, central_parameters[0:3], atol=1e-5) 43 | 44 | 45 | def test_snr_env_to_pc(snrenv, central_parameters, data): 46 | c = central.IdealObs(k=0.81, q=0.5, sigma_s=0.6, m=8000.) 47 | snrenvs = np.asarray([2.6649636, 6.13623543, 13.1771355, 24.11754981, 48 | 38.35865445, 55.59566425]) 49 | pc = c.transform(snrenvs) 50 | target = np.asarray([1.62223958e-02, 4.52538073e-01, 1.02766152e+01, 51 | 5.89991555e+01, 9.57537063e+01, 9.99301187e+01]) 52 | np.testing.assert_allclose(pc, target, atol=1e-4) 53 | 54 | 55 | def test_get_params(): 56 | p = {'k': 1, 'q': 2, 'sigma_s': 0.5, 'm': 800} 57 | c = central.IdealObs(**p) 58 | assert p == c.get_params() 59 | 60 | 61 | def test_fit_obs_set_m_and_sigma_s(data, snrenv): 62 | c = central.IdealObs() 63 | 64 | tests = (((1.42765076, 0.390529, 0.6, 12), (0.6, 12)), 65 | ((3.6590, 0.10341, 0.6, 8000), (0.6, 8000)), 66 | ((3.7464, 0.05159, -1.2144e-4, 8000), (None, 8000))) 67 | 68 | for target, values in tests: 69 | sigma_s, m = values 70 | c.fit_obs(snrenv, data, sigma_s=sigma_s, m=m) 71 | params = c.get_params() 72 | res = [params['k'], params['q'], params['sigma_s'], params['m']] 73 | np.testing.assert_allclose(res, target, atol=1e-4) 74 | 75 | 76 | def test_mod_filtering_for_simple_signal(): 77 | signal = np.asarray([1, 0, 1, 0, 1]) 78 | fs = 2205 79 | modf = np.asarray([1., 2., 4., 8., 16., 32., 64.]) 80 | mfb = central.EPSMModulationFilterbank(fs, modf) 81 | p, _ = mfb.filter(signal) 82 | target = np.asarray([6.69785298e-18, 6.06375859e-06, 2.42555385e-05, 83 | 9.70302212e-05, 3.88249957e-04, 1.55506496e-03, 84 | 6.25329663e-03]) 85 | assert_allclose(p, target, rtol=1e-2) 86 | -------------------------------------------------------------------------------- /pambox/tests/test_ec.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | from numpy.testing import assert_allclose, assert_array_equal 5 | import pytest 6 | 7 | from pambox.central import EC 8 | 9 | 10 | class TestECMethodsInputs: 11 | 12 | @classmethod 13 | def setup_class(cls): 14 | cls.ec = EC(22050) 15 | 16 | def test_equalize_different_shapes(self): 17 | with pytest.raises(ValueError): 18 | self.ec.equalize((1, 2, 3), ((1, 2, 3), (4, 5, 6)), 0) 19 | 20 | def test_equalize_more_than_2d(self): 21 | with pytest.raises(ValueError): 22 | self.ec.equalize(np.ones((1, 2, 3)), (0,), 0) 23 | 24 | def test_equalize_with_2d_inputs_but_only_one_integer_cf(self): 25 | with pytest.raises(ValueError): 26 | self.ec.equalize(np.ones((2, 2)), np.ones((2, 2)), 1) 27 | 28 | def test_equalize_with_2d_inputs_but_only_one_iterable_cf(self): 29 | with pytest.raises(ValueError): 30 | self.ec.equalize(np.ones((2, 2)), np.ones((2, 2)), (1,)) 31 | 32 | def test_cancel_different_shapes(self): 33 | with pytest.raises(ValueError): 34 | self.ec.cancel((1, 2, 3), ((1, 2, 3), (4, 5, 6)), 1, 0) 35 | 36 | def test_cancel_more_than_2d(self): 37 | with pytest.raises(ValueError): 38 | self.ec.cancel(np.ones((1, 2, 3)), (0,), 1, 0) 39 | 40 | 41 | class TestECReturnValues: 42 | 43 | @classmethod 44 | def setup_class(cls): 45 | cls.ec = EC(22050) 46 | 47 | @pytest.mark.parametrize("x, delay, target", [ 48 | ((1, 0, 0), 1, (0, 1, 0)), 49 | ((1, 0, 0), -1, (0, 0, 1)), 50 | ]) 51 | def test_shift(self, x, delay, target): 52 | x = np.asarray(x) 53 | delay = delay / self.ec.fs 54 | out = self.ec._shift(x, delay) 55 | assert_allclose(out, target, atol=1e-15) 56 | 57 | @pytest.mark.parametrize("left, right, cf, target", [ 58 | ((1, 0, 0), (1, 0, 0), 100, 0), 59 | ((2, 1, 0, 0), (0, 2, 1, 0), 1000, -1), 60 | ((0, 2, 1, 0), (2, 1, 0, 0), 1000, 1), 61 | ((1, 0, 0, 0), (0, 1, 0, 0), 1000, -1), 62 | ((0, 0, 0, 0), (0, 0, 0, 0), 1000, 0), 63 | ]) 64 | def test_find_tau(self, left, right, cf, target): 65 | left = np.asarray(left) 66 | right = np.asarray(right) 67 | tau = self.ec._find_tau(left, right, cf) 68 | assert tau == target / self.ec.fs 69 | 70 | def test_ec_cancel_2d_input_with_single_window(self): 71 | ec = EC(22050, win_len=None) 72 | out = ec.cancel([(2, 2), (2, 2)], [(1, 1,), (1, 1)], (1, 2), (0, 0)) 73 | assert_allclose(out, ((0.5, 0.5), (0, 0)), atol=1e-15) 74 | 75 | def test_ec_cancel_1d_input_with_single_window(self): 76 | ec = EC(22050, win_len=None) 77 | out = ec.cancel((2, 2), (1, 1,), 1, 0) 78 | assert_allclose(out, (0.5, 0.5)) 79 | 80 | def test_single_channel_cancel_with_tau_equals_zero(self): 81 | ec = EC(4, win_len=None) 82 | out = ec._single_chan_cancel(np.array((1., 1.)), np.array((1., 1.)), 1, 0) 83 | assert_array_equal(out, (0, 0)) 84 | 85 | def test_single_channel_cancel_with_tau_equals_one(self): 86 | ec = EC(4, win_len=None) 87 | out = ec._single_chan_cancel(np.array((0., 1.)), np.array((1., 0.)), 1, -0.25) 88 | assert_allclose(out, (0., 0.), atol=1e-16) 89 | 90 | def test_single_channel_cancel_with_overlap(self): 91 | ec = EC(4, win_len=0.5, overlap=0.5) 92 | out = ec._single_chan_cancel(np.ones(4), np.ones(4), 93 | (1, 1, 1), (0, 0, 0)) 94 | assert_array_equal(out, (0, 0, 0, 0)) 95 | 96 | def test_single_channel_cancel_with_without_overlap(self): 97 | ec = EC(4, win_len=0.5, overlap=0) 98 | out = ec._single_chan_cancel(np.ones(4), np.ones(4), (1, 1), (1, 1)) 99 | assert_array_equal(out, (0, 0, 0, 0)) 100 | 101 | @pytest.mark.parametrize("fs, left, right, win_len, overlap, " 102 | "target_shape", [ 103 | (4, (0, 0, 0, 0), (0, 0, 0, 0), 0.5, 0, 2), 104 | (4, (0, 0, 0, 0), (0, 0, 0, 0), 0.5, 0.5, 3), 105 | (4, (0, 0, 0, 0), (0, 0, 0, 0), 0.25, None, 4), 106 | (4, (0, 0, 0, 0), (0, 0, 0, 0), None, None, 1), 107 | (4, (0, 0, 0, 0), (0, 0, 0, 0), None, 0.5, 1), 108 | ]) 109 | def test_number_of_alpha_and_tau(self, fs, left, right, win_len, overlap, 110 | target_shape): 111 | ec = EC(fs, win_len=win_len, overlap=overlap) 112 | alphas, taus = ec.equalize(left, right, 100) 113 | assert len(alphas) == target_shape 114 | 115 | def test_find_tau_returns_zero_if_outwise_of_allowed_range(self): 116 | ec = EC(10, win_len=0.5, overlap=0) 117 | tau = ec._find_tau(np.arange(10), np.arange(10), 63) 118 | assert_array_equal(tau, 0) 119 | 120 | def test_equalization_of_2d_signal(self): 121 | ec = EC(4, win_len=1, overlap=None) 122 | alphas, taus = ec.equalize([(1, 0, 0, 0), (1, 0, 0, 0)], 123 | [(1, 0, 0, 0), (1, 0, 0, 0)], 124 | (100, 200)) 125 | assert_array_equal(alphas, ((1,), (1,))) 126 | assert_array_equal(taus, ((0, ), (0, ))) 127 | 128 | @pytest.mark.parametrize('x', [ 129 | np.array((1, 1, 1)), 130 | np.array(((1, 1, 1), 131 | (1, 1, 1))), 132 | ]) 133 | def test_return_shape_when_creating_jitter(self, x): 134 | input_shape = x.shape 135 | alphas, deltas = self.ec.create_jitter(x) 136 | assert alphas.shape == input_shape and deltas.shape == input_shape 137 | 138 | @pytest.mark.parametrize("x, alphas, deltas, target", [ 139 | (np.arange(4), np.zeros(4), np.zeros(4), np.arange(4)), 140 | (np.arange(4), 0.5 * np.ones(4), np.zeros(4), (0, 0.5, 1, 1.5)), 141 | (np.arange(4), np.zeros(4), np.ones(4), (1, 2, 3, 3)), 142 | (np.arange(4), np.zeros(4), -np.ones(4), (0, 0, 1, 2)), 143 | (np.arange(6).reshape((2, 3)), np.zeros((2, 3)), 144 | np.array(((1, 1, 1), (-1, -1, -1))), ((1, 2, 2), (3, 3, 4))), 145 | ]) 146 | def test_return_shape_when_applying_jitter(self, x, alphas, deltas, target): 147 | out = self.ec.apply_jitter(x, alphas, deltas) 148 | assert_array_equal(out, target) 149 | 150 | def test_setting_out_value_to_input_when_applying_jitter(self): 151 | x = np.arange(4, dtype='float') 152 | alphas = np.array((1, 1, 1, 1)) 153 | deltas = np.zeros(4, dtype='int') 154 | out = self.ec.apply_jitter(x, alphas, deltas, out=x) 155 | assert_array_equal(out, x) 156 | 157 | def test_out_value_is_different_from_input_when_applying_jitter(self): 158 | x = np.arange(4, dtype='float') 159 | alphas = np.array((1, 1, 1, 1)) 160 | deltas = np.zeros(4, dtype='int') 161 | out = self.ec.apply_jitter(x, alphas, deltas) 162 | with pytest.raises(AssertionError): 163 | assert_array_equal(out, x) 164 | 165 | @pytest.mark.parametrize("x, target", [ 166 | ((1, 1, 1), (0.7553155, 0.7553155, 0.55898691)), 167 | (((1, 1, 1), (1, 1, 1)), ((0.7553155, 0.8999607, 0.7553155), 168 | (0.5331105, 0.5331105, 1.24431947))), 169 | ]) 170 | def test_jitter_of_signal(self, x, target): 171 | np.random.seed(0) 172 | out = self.ec.jitter(x) 173 | assert_allclose(out, target, atol=1e-5) 174 | 175 | @pytest.mark.parametrize("n_samples, win_len, step, target", [ 176 | (7, 2, 1, 6), 177 | (7, 4, 2, 2), 178 | (7, 2, 2, 3), 179 | (7, 4, 4, 1), 180 | (7, 4, 1, 4), 181 | (7, 3, 1, 5), 182 | ]) 183 | def test_number_of_valid_windows(self, n_samples, win_len, step, target): 184 | n = self.ec._n_valid_windows(n_samples, win_len, step) 185 | assert n == target 186 | -------------------------------------------------------------------------------- /pambox/tests/test_experiment.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import os.path 4 | 5 | import numpy as np 6 | from numpy.testing import assert_allclose 7 | import pytest 8 | 9 | from pambox.speech import Experiment 10 | 11 | 12 | __DATA_ROOT__ = os.path.join(os.path.dirname(__file__), 'data') 13 | 14 | class TestExperiment(object): 15 | @pytest.mark.parametrize("fixed_target, target, masker, snr, exp_target, " 16 | "exp_masker", ( 17 | (True, [0, 1], [0, 1], 0, [0, 2514.86685937], [0, 2514.86685937]), 18 | (True, [0, 1], [0, 1], 5, [0, 2514.86685937], [0, 1414.21356237]), 19 | (False, [0, 1], [0, 1], 0, [0, 2514.86685937], [0, 2514.86685937]), 20 | (False, [0, 1], [0, 1], 5, [0, 4472.135955], [0, 2514.86685937]), 21 | )) 22 | def test_adjust_levels(self, fixed_target, 23 | target, masker, snr, 24 | exp_target, exp_masker): 25 | exp = Experiment([], [], [], fixed_target=fixed_target, fixed_level=65) 26 | target, masker = exp.adjust_levels(target, masker, snr) 27 | assert_allclose(target, exp_target, atol=1e-6) 28 | assert_allclose(masker, exp_masker, atol=1e-6) 29 | 30 | @classmethod 31 | def distort_passthrough(target, masker, *args, **kwargs): 32 | return target, masker 33 | 34 | def test_preprocessing(self): 35 | params = {} 36 | target = np.asarray([0, 1]) 37 | masker = np.asarray([0, 1]) 38 | snr = 0 39 | 40 | exp_target = [0, 1] 41 | exp_mix = [0, 2] 42 | exp_masker = [0, 1] 43 | 44 | exp = Experiment([], [], [], distortion=self.distort_passthrough, 45 | dist_params=params, adjust_levels_bef_proc=True, 46 | fixed_level=-3.0102999566398125) 47 | target, mix, masker = exp.preprocessing(target, masker, snr, params) 48 | assert_allclose(target, exp_target) 49 | assert_allclose(mix, exp_mix) 50 | assert_allclose(masker, exp_masker) 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /pambox/tests/test_inner.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import csv 4 | import os.path 5 | 6 | import numpy as np 7 | import scipy.io as sio 8 | from numpy.testing import assert_allclose 9 | import pytest 10 | 11 | from pambox import inner 12 | 13 | 14 | __DATA_ROOT__ = os.path.join(os.path.dirname(__file__), 'data') 15 | 16 | 17 | def test_lowpass_filtering_of_envelope(): 18 | mat = sio.loadmat(__DATA_ROOT__ + "/test_hilbert_env_and_lp_filtering_v1.mat", 19 | squeeze_me=True) 20 | envelope = mat['unfiltered_env'] 21 | target = mat['lp_filtered_env'] 22 | filtered_envelope = inner.lowpass_env_filtering(envelope, 150., 1, 22050.) 23 | assert_allclose(filtered_envelope, target, atol=1e-7) 24 | 25 | 26 | def test_erb(): 27 | bw = inner.erb_bandwidth(1000) 28 | assert_allclose(bw, 132.63, rtol=1e-4) 29 | 30 | 31 | def test_GammatoneFilterbank_filtering(): 32 | from itertools import product 33 | mat = sio.loadmat(__DATA_ROOT__ + '/test_GammatoneFilterbank_filtering.mat', 34 | squeeze_me=True) 35 | cf = [63, 1000] 36 | fs = [22050, 44100] 37 | for c, f in product(cf, fs): 38 | g = inner.GammatoneFilterbank(f, c) 39 | y = g.filter(mat['x']) 40 | target_file = 'y_%d_%d' % (f, c) 41 | np.testing.assert_allclose(y[0], mat[target_file]) 42 | 43 | 44 | def test_third_octave_filtering_of_noise_(): 45 | with open(os.path.join(__DATA_ROOT__, 46 | 'test_third_octave_filtering_of_noise.csv')) as \ 47 | csv_file: 48 | pass 49 | data_file = csv.reader(csv_file) 50 | temp = next(data_file) 51 | n_samples = int(temp[0]) 52 | x = np.empty(n_samples) 53 | 54 | for i, s in enumerate(data_file): 55 | x[i] = np.asarray(s, dtype=np.float) 56 | 57 | target = np.array([ 151.66437785, 688.6881118 ]) 58 | center_f = [63, 125] 59 | fs = 22050 60 | filterbank = inner.RectangularFilterbank(fs, center_f, width=3) 61 | rms_out = filterbank.filter(x) 62 | assert_allclose(rms_out, target, rtol=1e-4) 63 | 64 | 65 | @pytest.mark.parametrize("x, target", [ 66 | ([0, 1, 2, 1, 0], 67 | [0.70710678, 1.56751612, 2., 1.56751612, 0.70710678]), 68 | ([0, 1, 2, 1, 0], 69 | [0.70710678, 1.56751612, 2., 1.56751612, 0.70710678]), 70 | ([[0, 1], [0, 1]], 71 | [[0., 1.], [0., 1.]]), 72 | ([[0, 1, 0], [2, 3, 0]], 73 | [[0.5, 1., 0.5], [2.5, 3.16227766, 1.5]]), 74 | ]) 75 | def test_hilbert_env_on_2d_array_with_last_dimension(x, target): 76 | env = inner.hilbert_envelope(x) 77 | np.testing.assert_allclose(env, target, err_msg="Input was {}".format(x)) 78 | 79 | 80 | def test_envelope_extraction(): 81 | x = np.array( 82 | [-0.00032745, -0.00031198, -0.00029605, -0.00027965, -0.00026281, 83 | -0.00024553, -0.00022783, -0.00020972]) 84 | target = np.array( 85 | [0.00068165, 0.00068556, 0.00068946, 0.00069335, 0.00069725, 86 | 0.00070113, 0.00070502, 0.0007089]) 87 | envelope = inner.hilbert_envelope(x) 88 | np.testing.assert_allclose(envelope, target, atol=1e-3) 89 | -------------------------------------------------------------------------------- /pambox/tests/test_mrsepsm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import os.path 4 | 5 | import numpy as np 6 | from numpy.testing import assert_allclose 7 | import scipy.io as sio 8 | from six.moves import zip 9 | import pytest 10 | 11 | from pambox.speech import MrSepsm 12 | 13 | 14 | __DATA_ROOT__ = os.path.join(os.path.dirname(__file__), 'data') 15 | 16 | 17 | @pytest.fixture 18 | def mr(): 19 | return MrSepsm() 20 | 21 | 22 | @pytest.fixture 23 | def mat(): 24 | return sio.loadmat(__DATA_ROOT__ + '/test_mr_sepsm_snrenv_mr_v1.mat', 25 | squeeze_me=True) 26 | 27 | 28 | def test_mr_sepsm_mr_env_powers(mr): 29 | mat = sio.loadmat(__DATA_ROOT__ + '/test_mr_sepsm_mr_env_powers.mat', 30 | squeeze_me=True) 31 | channel_env = mat['env'].T[0] 32 | channel_env = channel_env[np.newaxis, np.newaxis, :] 33 | mod_channel_envs = mat['mod_channel_envs'].T[0] 34 | mod_channel_envs = mod_channel_envs[np.newaxis, np.newaxis, :, :] 35 | 36 | mr_env_powers = mr._mr_env_powers(channel_env, mod_channel_envs) 37 | for d, target in zip(mr_env_powers[0, 0], 38 | mat['mr_env_powers']): 39 | assert_allclose(d.compressed(), target[0]) 40 | 41 | 42 | def test_mr_snr_env(mr, mat): 43 | """Test calculation of SNRenv for a given channel 44 | """ 45 | mat = mat 46 | mat_mix = sio.loadmat(__DATA_ROOT__ + '/test_mr_sepsm_mr_snr_env_mix.mat') 47 | mat_noise = sio.loadmat(__DATA_ROOT__ + 48 | '/test_mr_sepsm_mr_snr_env_noise.mat') 49 | od_mix = np.ma.MaskedArray(mat_mix['data'], mat_mix['mask']) 50 | od_noise = np.ma.MaskedArray(mat_noise['data'], mat_noise['mask']) 51 | mr_snr_env, exc_ptns = mr._mr_snr_env(od_mix, od_noise) 52 | time_av_snr_env = mr._time_average(mr_snr_env) 53 | assert_allclose(time_av_snr_env, mat['timeAvg_SNRenvs']) 54 | 55 | 56 | def test_mr_sepsm_time_averaging_of_short_term_snr_env(mat): 57 | """Test the averaging of the multi-resolution representation of the SNRenv 58 | 59 | Given the OrderedDict of multi-resolution SNRenv values, for a given 60 | channel, average it over time. 61 | """ 62 | in_mat = sio.loadmat(__DATA_ROOT__ + 63 | '/test_mr_sepsm_time_average_snr.mat', 64 | squeeze_me=True) 65 | mr_snr_env = np.ma.MaskedArray(in_mat['data'], in_mat['mask']) 66 | 67 | mr = MrSepsm() 68 | t_av = mr._time_average(mr_snr_env) 69 | assert_allclose(t_av, mat['timeAvg_SNRenvs']) 70 | 71 | 72 | @pytest.mark.slow 73 | def test_complete_mr_sepsm(mr): 74 | mat_complete = sio.loadmat(__DATA_ROOT__ + 75 | '/test_mr_sepsm_full.mat', 76 | squeeze_me=True) 77 | """Test the prediction by the mr-sEPSM 78 | """ 79 | mix = mat_complete['mix'] 80 | noise = mat_complete['noise'] 81 | tests = ( 82 | (mix, noise, 17.15), 83 | ) 84 | 85 | for mix, noise, target in tests: 86 | res = mr.predict(mix, mix, noise) 87 | 88 | assert_allclose( 89 | res['p']['snr_env'] 90 | , target 91 | , rtol=0.01 92 | ) 93 | -------------------------------------------------------------------------------- /pambox/tests/test_sepsm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import csv 4 | import os.path 5 | 6 | import numpy as np 7 | from numpy.testing import assert_allclose, assert_array_equal 8 | import scipy.io as sio 9 | import pytest 10 | 11 | from pambox.speech import sepsm 12 | 13 | 14 | __DATA_ROOT__ = os.path.join(os.path.dirname(__file__), 'data') 15 | 16 | 17 | def test_select_bands_above_threshold(): 18 | center_f = np.asarray([63, 80, 100, 125, 160, 200, 250, 315, 400, 500, 19 | 630, 800, 1000, 1250, 1600, 2000, 2500, 3150, 4000, 20 | 5000, 6300, 8000]) 21 | noise_rms = [142.598279903563, 596.254784935965, 1319.07476787393, 22 | 1931.80860942992, 2180.13918820141, 1714.49937340166, 23 | 2009.77926719000, 1130.48579025285, 820.432762207735, 24 | 1006.49592779826, 1523.47513285058, 921.921756875459, 25 | 791.901475253190, 1508.59965109920, 825.572455447266, 26 | 657.161350227808, 626.333420574852, 474.950833753788, 27 | 331.591691820142, 206.744689750152, 491.003492858161, 28 | 297.383958806200] 29 | target = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 30 | 20, 21, 22] 31 | c = sepsm.Sepsm(cf=center_f) 32 | bands_above_thres = c._bands_above_thres(noise_rms) 33 | # Make 1-based to compare with matlab 34 | bands_above_thres += 1 35 | assert_array_equal(bands_above_thres, target) 36 | 37 | 38 | @pytest.fixture 39 | def model(): 40 | return sepsm.Sepsm() 41 | 42 | 43 | @pytest.mark.parametrize("target, p_mix, p_noise", [ 44 | (0.001, (0., ), (0., )), 45 | (0.001, (0.0001,), (0,)), 46 | (0.001, (0.01,), (1,)), 47 | (0.001, (0, 0), (0, 0)), 48 | (0.001, ((0, 0), (0, 0)), ((0, 0), (0, 0))), 49 | (0.001, ((0, 0), (0, 0)), ((0, 0), (0, 0))), 50 | ]) 51 | def test_snr_env(model, target, p_mix, p_noise): 52 | snrenv, _ = model._snr_env(p_mix, p_noise) 53 | assert_allclose(snrenv, target) 54 | 55 | 56 | @pytest.fixture 57 | def mix_and_noise_snr_min9_db(): 58 | with open(os.path.join(__DATA_ROOT__, 'test_full_sepsm.csv')) as csv_file: 59 | data_file = csv.reader(csv_file) 60 | temp = next(data_file) 61 | n_samples = int(temp[0]) 62 | mix = np.empty(n_samples) 63 | noise = np.empty(n_samples) 64 | 65 | for i, (m, n) in enumerate(data_file): 66 | mix[i] = np.asarray(m, dtype=np.float) 67 | noise[i] = np.asarray(n, dtype=np.float) 68 | return mix, noise 69 | 70 | 71 | def test_sepsm_prediction_snr_min9_db(model, mix_and_noise_snr_min9_db): 72 | mix, noise = mix_and_noise_snr_min9_db 73 | 74 | target_snr_env = 9.57297 75 | 76 | res = model.predict(mix, mix, noise) 77 | assert_allclose(target_snr_env, res['p']['snr_env'], rtol=1e-3) 78 | 79 | 80 | @pytest.mark.slow 81 | def test_sepsm_predictions_snr_0_kappa_0_8(model): 82 | mat = sio.loadmat(__DATA_ROOT__ + '/test_sepsm_spec_sub_0dB_kappa_0_8.mat', 83 | squeeze_me=True, struct_as_record=False) 84 | for ii in range(3): 85 | mix = mat['mixtures'][ii] 86 | noise = mat['noises'][ii] 87 | target = mat['results'][ii].SNRenv 88 | res = model.predict(mix, mix, noise) 89 | assert_allclose(target, res['p']['snr_env'], rtol=8e-2) 90 | -------------------------------------------------------------------------------- /pambox/tests/test_sii.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import os.path 4 | 5 | import numpy as np 6 | from numpy.testing import assert_allclose 7 | from pandas import read_csv 8 | import pytest 9 | 10 | from pambox.speech.sii import SII 11 | 12 | 13 | __DATA_ROOT__ = os.path.join(os.path.dirname(__file__), 'data') 14 | 15 | 16 | @pytest.fixture(scope='module') 17 | def data(): 18 | d = read_csv(__DATA_ROOT__ + '/test_sii.csv') 19 | return list(d.itertuples()) 20 | 21 | 22 | @pytest.mark.parametrize('_, E, N, T, I, target', data()) 23 | def test_sii(_, E, N, T, I, target): 24 | """@todo: Docstring for test_sii. 25 | :returns: @todo 26 | 27 | """ 28 | s = SII(T=T * np.ones(18), I=I) 29 | ss = s.predict_spec(clean=E*np.ones(18), noise=N*np.ones(18)) 30 | assert_allclose(ss['p']['sii'], target, rtol=1e-4) 31 | 32 | -------------------------------------------------------------------------------- /pambox/tests/test_speech_material.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import os.path 4 | 5 | import numpy as np 6 | from numpy.testing import assert_allclose, dec, TestCase 7 | 8 | from pambox.speech import material 9 | from pambox import utils 10 | 11 | __DATA_ROOT__ = os.path.join(os.path.dirname(__file__), 'data') 12 | 13 | 14 | def test_set_level(): 15 | # Set the reference level to 100 dB 16 | ref_level = 100 17 | c = material.Material(path_to_ssn=os.path.join(__DATA_ROOT__, 18 | 'dummy_ssn.wav'), 19 | ref_level=ref_level) 20 | # But actually create a 97 dB signal. 21 | sentence_level = 97 # dB SPL 22 | target = sentence_level 23 | x = np.random.randn(100) 24 | x = utils.setdbspl(x, sentence_level) 25 | # So when setting the level to the reference, we should get the actual 26 | # sentence level. 27 | level = utils.dbspl(c.set_level(x, ref_level)) 28 | assert_allclose(level, target) 29 | 30 | # Now if we set a target different from the reference level 31 | sentence_level = 97 32 | x = utils.setdbspl(x, sentence_level) 33 | level = utils.dbspl(c.set_level(x, ref_level + 3)) 34 | assert_allclose(level, sentence_level + 3) 35 | -------------------------------------------------------------------------------- /pambox/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import os.path 4 | 5 | import numpy as np 6 | from numpy.testing import assert_allclose 7 | from scipy import signal 8 | import pytest 9 | 10 | from pambox import utils 11 | from pambox.utils import fftfilt 12 | 13 | 14 | __DATA_ROOT__ = os.path.join(os.path.dirname(__file__), 'data') 15 | 16 | 17 | @pytest.mark.parametrize('x, ac, offset, axis, target', [ 18 | ([0], True, 0, -1, -np.inf), 19 | ([1], False, 0, -1, 0), 20 | ([1], False, 100, -1, 100), 21 | ([1], True, 0, -1, -np.inf), 22 | ([10], False, 0, -1, 20), 23 | ([10, 10], False, 0, -1, 20), 24 | ([10, 10], False, 0, 1, [20, 20]), 25 | ]) 26 | def test_dbspl(x, ac, offset, axis, target): 27 | assert_allclose(utils.dbspl(x, ac=ac, offset=offset, 28 | axis=axis), target) 29 | 30 | 31 | @pytest.mark.parametrize('x, ac, axis, target', [ 32 | ([0, 1, 2, 3, 4, 5, 6], True, -1, 2), 33 | ([[0, 1, 2, 3, 4, 5, 6]], True, 0, [0, 0, 0, 0, 0, 0, 0]), 34 | ([[0, 1, 2, 3, 4, 5, 6]], True, 1, 2), 35 | ([[0, 1, 2, 3, 4, 5, 6], [0, 1, 2, 3, 4, 5, 6]], True, -1, [2, 2]), 36 | ([0, 1, 2, 3, 4, 5, 6], False, -1, 3.60555128), 37 | ([[0, 1, 2, 3, 4, 5, 6], [0, 1, 2, 3, 4, 5, 6]], False, -1, 38 | [3.60555128, 3.60555128]), 39 | ]) 40 | def test_rms_do_ac(x, ac, axis, target): 41 | out = utils.rms(x, ac=ac, axis=axis) 42 | assert_allclose(out, target) 43 | 44 | 45 | @pytest.mark.parametrize('x, ac, axis, target', [ 46 | ([0], True, -1, 0), 47 | ([1], True, -1, 0), 48 | ([1], False, -1, 1), 49 | ([-1], False, -1, 1), 50 | ([-1], True, -1, 0), 51 | ([10, 10], False, -1, 10), 52 | ([10, 10], True, -1, 0), 53 | ([[0, 1], [0, 1]], True, -1, [0.5, 0.5]), 54 | ([[0, 1], [0, 1]], False, -1, [0.70710678, 0.70710678]), 55 | ([[0, 1], [0, 1]], True, 0, [0, 0]), 56 | ([[0, 1], [0, 1]], False, 0, [0, 1]), 57 | ([[0, 1], [0, 1]], True, 1, [0.5, 0.5]), 58 | ([[0, 1], [0, 1]], False, 1, [0.70710678, 0.70710678]), 59 | ]) 60 | def test_rms(x, ac, axis, target): 61 | assert_allclose(utils.rms(x, ac=ac, axis=axis), target) 62 | 63 | 64 | @pytest.mark.parametrize("x, level, offset, target", [ 65 | ((0, 1), 65, 100, (0., 0.02514867)), 66 | ((0, 1), 65, 0, (0., 2514.86685937)), 67 | ((0, 1), 100, 100, (0., 1.41421356)), 68 | ]) 69 | def test_set_level(x, level, offset, target): 70 | y = utils.setdbspl(x, level, offset=offset) 71 | assert_allclose(y, target, atol=1e-4) 72 | 73 | 74 | # Can't be done programmatically, because the exact third-octave spacing is not 75 | # exactly the same as the one commonly used. 76 | @pytest.mark.xfail(run=False, reason="Real 3rd-oct != common ones") 77 | def test_third_oct_center_freq_bet_63_12500_hz(): 78 | """Test returns correct center frequencies for third-octave filters 79 | 80 | Between 63 and 12500 Hz. 81 | 82 | """ 83 | center_f = (63, 80, 100, 125, 160, 200, 250, 315, 400, 500, 630, 800, 1000, 84 | 1250, 1600, 2000, 2500, 3150, 4000, 5000, 6300, 8000) 85 | assert utils.noctave_center_freq(63, 12500, width=3) == center_f 86 | 87 | 88 | def test_find_calculate_srt_when_found(): 89 | x = np.arange(10) 90 | y = 20 * x + 4 91 | assert 2.3 == utils.int2srt(x, y, srt_at=50) 92 | 93 | 94 | def test_find_calculate_srt_when_not_found(): 95 | x = np.arange(10) 96 | y = 2 * x + 4 97 | assert np.isnan(utils.int2srt(x, y, srt_at=50)) 98 | 99 | 100 | def test_find_srt_when_srt_at_index_zero(): 101 | x = [0, 1] 102 | y = [50, 51] 103 | assert 0 == utils.int2srt(x, y, srt_at=50) 104 | 105 | 106 | @pytest.mark.parametrize("inputs, targets", [ 107 | (([1], [1, 1]), ([1, 0], [1, 1])), 108 | (([1, 1], [1, 1]), ([1, 1], [1, 1])), 109 | (([1, 1], [1]), ([1, 1], [1, 0])), 110 | (([1], [1, 1], False), ([1], [1])), 111 | ]) 112 | def test_make_same_length_with_padding(inputs, targets): 113 | assert_allclose(utils.make_same_length(*inputs), targets) 114 | 115 | 116 | def test_psy_fn(): 117 | x = -3.0 118 | mu = 0. 119 | sigma = 1.0 120 | target = 0.13498980316300957 121 | y = utils.psy_fn(x, mu, sigma) 122 | assert_allclose(y, target) 123 | 124 | 125 | class _TestFFTFilt(): 126 | dt = None 127 | 128 | def test_fftfilt(self): 129 | dt = 1e-6 130 | fs = 1/dt 131 | u = np.random.rand(10**6) 132 | f = 10**4 133 | b = signal.firwin(50, f/fs) 134 | 135 | u_lfilter = signal.lfilter(b, 1, u) 136 | u_fftfilt = fftfilt(b, u) 137 | assert_allclose(u_lfilter, u_fftfilt) 138 | 139 | def test_rank1(self): 140 | # pytest.mark.skipif(self.dt in [np.longdouble, np.longcomplex], 141 | # reason="Type %s is not supported by fftpack" % self.dt) 142 | # dec.knownfailureif( 143 | # self.dt in [np.longdouble, np.longcomplex], 144 | # "Type %s is not supported by fftpack" % self.dt)(lambda: None)() 145 | 146 | x = np.arange(6).astype(self.dt) 147 | 148 | # Test simple FIR 149 | b = np.array([1, 1]).astype(self.dt) 150 | y_r = np.array([0, 1, 3, 5, 7, 9.]).astype(self.dt) 151 | assert_allclose(fftfilt(b, x), y_r, atol=1e-6) 152 | 153 | # Test simple FIR with FFT length 154 | b = np.array([1, 1]).astype(self.dt) 155 | y_r = np.array([0, 1, 3, 5, 7, 9.]).astype(self.dt) 156 | n = 12 157 | assert_allclose(fftfilt(b, x, n), y_r, atol=1e-6) 158 | 159 | # Test simple FIR with FFT length which is a power of 2 160 | b = np.array([1, 1]).astype(self.dt) 161 | y_r = np.array([0, 1, 3, 5, 7, 9.]).astype(self.dt) 162 | n = 32 163 | assert_allclose(fftfilt(b, x, n), y_r, atol=1e-6) 164 | 165 | # Test simple FIR with FFT length 166 | b = np.array(np.ones(6)).astype(self.dt) 167 | y_r = np.array([0, 1, 3, 6, 10, 15]).astype(self.dt) 168 | assert_allclose(fftfilt(b, x), y_r, atol=1e-6) 169 | 170 | def test_rank2_x_longer_than_b(self): 171 | pytest.mark.skipif(self.dt in [np.longdouble, np.longcomplex], 172 | reason="Type %s is not supported by fftpack" % self.dt) 173 | # dec.knownfailureif( 174 | # self.dt in [np.longdouble, np.longcomplex], 175 | # "Type %s is not supported by fftpack" % self.dt)(lambda: None)() 176 | shape = (4, 3) 177 | x = np.linspace(0, np.prod(shape) - 1, np.prod(shape)).reshape(shape) 178 | x = x.astype(self.dt) 179 | 180 | b = np.array([1, 1]).astype(self.dt) 181 | 182 | y_r2 = np.array([[0, 1, 3], [3, 7, 9], [6, 13, 15], [9, 19, 21]], 183 | dtype=self.dt) 184 | 185 | y = fftfilt(b, x) 186 | assert_allclose(y, y_r2) 187 | 188 | def test_rank2_b_longer_than_x(self): 189 | pytest.mark.skipif(self.dt in [np.longdouble, np.longcomplex], 190 | reason="Type %s is not supported by fftpack" % self.dt) 191 | # dec.knownfailureif( 192 | # self.dt in [np.longdouble, np.longcomplex], 193 | # "Type %s is not supported by fftpack" % self.dt)(lambda: None)() 194 | 195 | shape = (4, 3) 196 | x = np.linspace(0, np.prod(shape) - 1, np.prod(shape)).reshape(shape) 197 | x = x.astype(self.dt) 198 | 199 | b = np.array([1, 1, 1, 1]).astype(self.dt) 200 | 201 | y_r2 = np.array([[0, 1, 3], [3, 7, 12], [6, 13, 21], [9, 19, 30]], 202 | dtype=self.dt) 203 | 204 | y = utils.fftfilt(b, x) 205 | assert_allclose(y, y_r2, atol=1e-6) 206 | 207 | def test_b_rank2(self): 208 | pytest.mark.skipif(self.dt in [np.longdouble, np.longcomplex], 209 | reason="Type %s is not supported by fftpack" % self.dt) 210 | # dec.knownfailureif( 211 | # self.dt in [np.longdouble, np.longcomplex], 212 | # "Type %s is not supported by fftpack" % self.dt)(lambda: None)() 213 | 214 | x = np.linspace(0, 5, 6).astype(self.dt) 215 | 216 | b = np.array([[1, 1], [2, 2]]).astype(self.dt) 217 | 218 | y_r2 = np.array([[0, 1, 3, 5, 7, 9], [0, 2, 6, 10, 14, 18]], 219 | dtype=self.dt) 220 | y = utils.fftfilt(b, x) 221 | assert_allclose(y, y_r2) 222 | 223 | b = np.array([[1, 1], [2, 2], [3, 3]]).astype(self.dt) 224 | 225 | y_r2 = np.array([[0, 1, 3, 5, 7, 9], 226 | [0, 2, 6, 10, 14, 18], 227 | [0, 3, 9, 15, 21, 27]], 228 | dtype=self.dt) 229 | y = utils.fftfilt(b, x) 230 | assert_allclose(y, y_r2, atol=1e-6) 231 | 232 | def test_b_and_x_of_same_dim(self): 233 | pytest.mark.skipif(self.dt in [np.longdouble, np.longcomplex], 234 | reason="Type %s is not supported by fftpack" % self.dt) 235 | # dec.knownfailureif( 236 | # self.dt in [np.longdouble, np.longcomplex], 237 | # "Type %s is not supported by fftpack" % self.dt)(lambda: None)() 238 | 239 | shape = (2, 5) 240 | x = np.linspace(0, np.prod(shape) - 1, np.prod(shape)).reshape(shape) 241 | x = x.astype(self.dt) 242 | 243 | b = np.array([[1, 1], [2, 2]]).astype(self.dt) 244 | 245 | y_r2 = np.array([[0, 1, 3, 5, 7], [10, 22, 26, 30, 34]], 246 | dtype=self.dt) 247 | y = utils.fftfilt(b, x) 248 | assert_allclose(y, y_r2, atol=1e-6) 249 | 250 | 251 | class TestFFTFiltFloat32(_TestFFTFilt): 252 | dt = np.float32 253 | 254 | 255 | class TestFFTFiltFloat64(_TestFFTFilt): 256 | dt = np.float64 257 | 258 | 259 | def test_hilbert(): 260 | x = np.random.randn(100) 261 | assert_allclose(utils.hilbert(x), 262 | signal.hilbert(x)) 263 | -------------------------------------------------------------------------------- /readthedocs-requirements.txt: -------------------------------------------------------------------------------- 1 | numpydoc 2 | mock 3 | sphinx 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | dill==0.2.5 2 | six>=1.4.1 3 | numpy>=1.8.0 4 | scipy>=0.13.3 5 | pandas>=0.13.1 6 | matplotlib>=1.3.1 7 | ipython-notebook>=2.3.1 8 | 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | from setuptools import setup 6 | from setuptools.command.test import test as TestCommand 7 | import codecs 8 | import os 9 | import re 10 | 11 | 12 | here = os.path.abspath(os.path.dirname(__file__)) 13 | 14 | def read(*parts): 15 | # intentionally *not* adding an encoding option to open 16 | return codecs.open(os.path.join(here, *parts), 'r').read() 17 | 18 | def read(*parts): 19 | # intentionally *not* adding an encoding option to open 20 | return codecs.open(os.path.join(here, *parts), 'r').read() 21 | 22 | 23 | def find_version(*file_paths): 24 | version_file = read(*file_paths) 25 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", 26 | version_file, re.M) 27 | if version_match: 28 | return version_match.group(1) 29 | raise RuntimeError("Unable to find version string.") 30 | 31 | 32 | long_description = read('README.rst') 33 | 34 | 35 | def check_dependencies(): 36 | 37 | import os 38 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 39 | if on_rtd: 40 | return 41 | 42 | # Just make sure dependencies exist, I haven't rigorously 43 | # tested what the minimal versions that will work are 44 | # (help on that would be awesome) 45 | try: 46 | import numpy 47 | except ImportError: 48 | raise ImportError("pambox requires numpy") 49 | try: 50 | import scipy 51 | except ImportError: 52 | raise ImportError("pambox requires scipy") 53 | try: 54 | import matplotlib 55 | except ImportError: 56 | raise ImportError("pambox requires matplotlib") 57 | try: 58 | import pandas 59 | except ImportError: 60 | raise ImportError("pambox requires pandas") 61 | 62 | 63 | class PyTest(TestCommand): 64 | def finalize_options(self): 65 | TestCommand.finalize_options(self) 66 | self.test_args = ['--runslow', 'pambox/tests'] 67 | self.test_suite = True 68 | 69 | def run_tests(self): 70 | import pytest 71 | errcode = pytest.main(self.test_args) 72 | sys.exit(errcode) 73 | 74 | 75 | if __name__ == '__main__': 76 | 77 | import sys 78 | if not (len(sys.argv) >= 2 and ('--help' in sys.argv[1:] or 79 | sys.argv[1] in ('--help-commands', 'egg_info', '--version', 80 | 'clean'))): 81 | check_dependencies() 82 | 83 | setup( 84 | name='pambox', 85 | description='A Python toolbox for auditory modeling', 86 | author='Alexandre Chabot-Leclerc', 87 | author_email='pambox@alex.alexchabot.net', 88 | version=find_version('pambox', '__init__.py'), 89 | url='https://bitbucket.org/achabotl/pambox', 90 | license='Modified BSD License', 91 | tests_require=['pytest'], 92 | install_requires=[ 93 | 'six>=1.4.1', 94 | ], 95 | cmdclass={'test': PyTest}, 96 | long_description=long_description, 97 | packages=['pambox'], 98 | include_package_data=True, 99 | platforms='any', 100 | test_suite='pambox.tests', 101 | classifiers=[ 102 | 'Development Status :: 3 - Alpha', 103 | 'Intended Audience :: Science/Research', 104 | 'License :: OSI Approved :: BSD License', 105 | 'Natural Language :: English', 106 | 'Programming Language :: Python :: 2.7', 107 | 'Programming Language :: Python :: 3.4', 108 | 'Topic :: Scientific/Engineering', 109 | 'Operating System :: POSIX', 110 | 'Operating System :: Unix', 111 | 'Operating System :: MacOS' 112 | ], 113 | extras_require={ 114 | 'testing': ['pytest'] 115 | } 116 | ) 117 | -------------------------------------------------------------------------------- /testing_requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist=docs,py27, py34 3 | toxworkdir={homedir}/tox_pambox 4 | 5 | [testenv] 6 | deps = 7 | -rrequirements.txt 8 | -rtesting_requirements.txt 9 | 10 | [testenv:py27] 11 | commands = 12 | py.test --cov pambox --runslow pambox/tests 13 | 14 | [testenv:py34] 15 | basepython=/usr/local/bin/python3.4 16 | commands = 17 | py.test --runslow pambox/tests 18 | 19 | [testenv:docs] 20 | basepython=python2 21 | changedir=docs 22 | deps= 23 | sphinx 24 | numpydoc 25 | -rrequirements.txt 26 | -rtesting_requirements.txt 27 | whitelist_externals=make 28 | commands=make html 29 | --------------------------------------------------------------------------------