├── .github
    └── FUNDING.yml
├── .gitignore
├── .travis.yml
├── AlternetiveTravisCI
├── CONTRIBUTING.rst
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.rst
├── __init__.py
├── _config.yml
├── _images
    ├── Speech_GIF.gif
    ├── follow-twitter.gif
    ├── pipeline.jpg
    ├── readme.rst
    ├── speech.gif
    ├── speechpy_logo.gif
    └── stackframes.png
├── docs
    ├── Makefile
    ├── _config.yml
    ├── requirements.txt
    └── source
    │   ├── _static
    │       └── img
    │       │   ├── 08063416.pdf
    │       │   ├── Speech_GIF.gif
    │       │   ├── installation_logo.gif
    │       │   ├── installation_logo.jpg
    │       │   ├── speech.gif
    │       │   ├── speech.jpg
    │       │   ├── speechpy_logo.gif
    │       │   ├── speechpy_logo.jpg
    │       │   └── stackframes.png
    │   ├── _templates
    │       ├── breadcrumbs.html
    │       └── breadcrumbs.html~
    │   ├── conf.py
    │   ├── content
    │       ├── features.rst
    │       ├── postprocessing.rst
    │       └── preprocessing.rst
    │   ├── epilogue
    │       ├── CONTRIBUTING.rst
    │       ├── finalnote.rst
    │       └── test.rst
    │   ├── index.rst
    │   └── intro
    │       └── introductions.rst
├── example
    ├── Alesis-Sanctuary-QCard-AcoustcBas-C2.wav
    ├── test_local.py
    └── test_package.py
├── paper
    ├── paper.bib
    ├── paper.md
    ├── paper.pdf
    └── test
    │   ├── _imgs
    │       ├── Scheme_of_speech_recognition_system.png
    │       ├── packageview.png
    │       └── travicCI.png
    │   └── test.md
├── requirements.txt
├── setup.cfg
├── setup.py
├── speechpy
    ├── __init__.py
    ├── feature.py
    ├── functions.py
    └── processing.py
└── tests
    ├── Alesis-Sanctuary-QCard-AcoustcBas-C2.wav
    └── test_speechpy.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [astorfi]
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 | 
 4 |   - "2.7"
 5 |   - "3.4"
 6 |   - "3.5"
 7 | 
 8 | # command to install dependencies
 9 | install:
10 |   - pip install -r requirements.txt
11 |   - pip install coveralls
12 |   - pip install codecov
13 | 
14 | script:
15 |   - coverage run --omit=*.virtualenvs*,*virtualenv* example/test_package.py test
16 |   - coverage run --omit=*.virtualenvs*,*virtualenv* example/test_local.py test
17 |   - pytest tests/
18 |   
19 | 
20 | after_success:
21 |   - coveralls
22 |   - codecov
23 | 
24 | sudo: enabled
25 | dist: trusty
26 | 


--------------------------------------------------------------------------------
/AlternetiveTravisCI:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 | 
 4 |   - "2.7"
 5 |   - "3.4"
 6 |   - "3.5"
 7 |   - "3.5-dev" # 3.5 development branch
 8 | 
 9 | # command to install dependencies
10 | install: "pip install -r requirements.txt"
11 | 
12 | # command to run tests
13 | script: python setup.py develop
14 | 
15 | sudo: enabled
16 | dist: trusty
17 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | *************
 3 | Contributing
 4 | *************
 5 | 
 6 | When contributing to this repository, you are more than welcome to discuss your feedback with any of the owners of this repository. *For typos, please do not create a pull request. Instead, declare them in issues or email the repository owner*. For technical and conceptual questions please feel free to **directly contact the repository owner**. Before asking general questions related to the concepts and techniques provided in this project, **please make sure to read and understand its associated paper**. 
 7 | 
 8 | Please note we have a code of conduct, please follow it in all your interactions with the project.
 9 | 
10 | ====================
11 | Pull Request Process
12 | ====================
13 | 
14 | Please consider the following criterions in order to help us in a better way:
15 | 
16 | 1. The pull request is mainly expected to be a code script suggestion or improvement.
17 | 2. A pull request related to non-code-script sections is expected to make a significant difference in the documentation. Otherwise, it is expected to be announced in the issues section.
18 | 3. Ensure any install or build dependencies are removed before the end of the layer when doing a 
19 |    build and creating a pull request.
20 | 4. Add comments with details of changes to the interface, this includes new environment 
21 |    variables, exposed ports, useful file locations and container parameters.
22 | 5. You may merge the Pull Request in once you have the sign-off of at least one other developer, or if you 
23 |    do not have permission to do that, you may request the owner to merge it for you if you believe all checks are passed.
24 | 
25 | ============
26 | Final Note
27 | ============
28 | 
29 | We are looking forward to your kind feedback. Please help us to improve this open source project and make our work better. 
30 | For contribution, please create a pull request and we will investigate it promptly. Once again, we appreciate 
31 | your kind feedback and elaborate code inspections.
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {2017} {Amirsina Torfi}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst
2 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | init:
2 | 	pip install -r requirements.txt
3 | 
4 | test:
5 | 	nosetests tests
6 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. image:: _images/speechpy_logo.gif
  2 |     :target: https://github.com/astorfi/speech_feature_extraction/blob/master/images/speechpy_logo.gif
  3 |    
  4 | ===============================================
  5 | `SpeechPy Official Project Documentation`_
  6 | ===============================================
  7 | 
  8 | .. image:: https://pepy.tech/badge/speechpy
  9 |    :target: https://pepy.tech/project/speechpy
 10 | .. image:: https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat
 11 |     :target: https://github.com/astorfi/speechpy/pulls
 12 | .. image:: https://coveralls.io/repos/github/astorfi/speechpy/badge.svg?branch=master
 13 |     :target: https://coveralls.io/github/astorfi/speechpy?branch=master
 14 | .. image:: https://codecov.io/gh/astorfi/speechpy/branch/master/graph/badge.svg
 15 |     :target: https://codecov.io/gh/astorfi/speechpy
 16 | .. image:: https://badge.fury.io/py/speechpy.svg
 17 |     :target: https://badge.fury.io/py/speechpy
 18 | .. image:: http://joss.theoj.org/papers/10.21105/joss.00749/status.svg
 19 |     :target: https://doi.org/10.21105/joss.00749
 20 | .. image:: https://img.shields.io/twitter/follow/amirsinatorfi.svg?label=Follow&style=social
 21 |       :target: https://twitter.com/amirsinatorfi
 22 |     
 23 | .. .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.810391.svg
 24 |    ..  :target: https://doi.org/10.5281/zenodo.810391
 25 | 
 26 | .. _SpeechPy Official Project Documentation: http://speechpy.readthedocs.io
 27 | 
 28 | 
 29 | ==========================
 30 | Table of Contents
 31 | ==========================
 32 | .. contents::
 33 |   :local:
 34 |   :depth: 3
 35 | 
 36 | ---------------------
 37 | Documentation
 38 | ---------------------
 39 | 
 40 | This library provides most frequent used speech features including MFCCs and filterbank energies alongside with the log-energy of filterbanks.
 41 | If you are interested to see what are MFCCs and how they are generated please refer to this
 42 | `wiki <https://github.com/astorfi/speech_feature_extraction/wiki/>`_ page.
 43 | 
 44 | .. image:: _images/speech.gif
 45 | 
 46 | 
 47 | Please refer to the following links for further informations:
 48 | 
 49 | `SpeechPy Official Project Documentation`_
 50 | 
 51 | `Paper`_
 52 | 
 53 | .. _SpeechPy Official Project Documentation: http://speechpy.readthedocs.io
 54 | .. _Paper: https://doi.org/10.21105/joss.00749
 55 | 
 56 | ------------------------------------------
 57 | Which Python versions are supported
 58 | ------------------------------------------
 59 | 
 60 | Currently, the package has been tested and verified using Python ``2.7``, ``3.4`` and ``3.5``.
 61 | 
 62 | ---------------------
 63 | Citation
 64 | ---------------------
 65 | 
 66 | If you used this package, please kindly cite it as follows:
 67 | 
 68 | .. code:: bash
 69 | 
 70 | 	    @article{torfi2018speechpy,
 71 | 	      title={SpeechPy-A Library for Speech Processing and Recognition},
 72 | 	      author={Torfi, Amirsina},
 73 | 	      journal={arXiv preprint arXiv:1803.01094},
 74 | 	      year={2018}
 75 |              }
 76 | 
 77 | ---------------------
 78 | How to Install?
 79 | ---------------------
 80 | 
 81 | There are two possible ways for installation of this package: local installation and PyPi.
 82 | 
 83 | ~~~~~~~~~~~~~~~~~~~
 84 | Local Installation
 85 | ~~~~~~~~~~~~~~~~~~~
 86 | 
 87 | For local installation at first the repository must be cloned::
 88 | 
 89 | 	git clone https://github.com/astorfi/speech_feature_extraction.git
 90 | 
 91 | After cloning the reposity, root to the repository directory then execute::
 92 | 
 93 | 	python setup.py develop
 94 | 
 95 | ~~~~~
 96 | Pypi
 97 | ~~~~~
 98 | 
 99 | The package is available on PyPi. For direct installation simply execute the following:
100 | 
101 | .. code-block:: shell
102 | 
103 |      pip install speechpy
104 | 
105 | 
106 | ------------------------------------------
107 | What Features are supported?
108 | ------------------------------------------
109 | - Mel Frequency Cepstral Coefficients(MFCCs)
110 | - Filterbank Energies
111 | - Log Filterbank Energies
112 | 
113 | Please refer to `SpeechPy Official Project Documentation`_ for details about the supported features.
114 | 
115 | ~~~~~~~~~~~~~~
116 | MFCC Features
117 | ~~~~~~~~~~~~~~
118 | 
119 | |pic1| |pic2|
120 | 
121 | .. |pic1| image:: _images/Speech_GIF.gif
122 |    :width: 45%
123 | 
124 | .. |pic2| image:: _images/pipeline.jpg
125 |    :width: 45%
126 | 
127 | The supported attributes for generating MFCC features can be seen by investigating the related function:
128 | 
129 | .. code-block:: python
130 | 
131 |       def mfcc(signal, sampling_frequency, frame_length=0.020, frame_stride=0.01,num_cepstral =13,
132 |              num_filters=40, fft_length=512, low_frequency=0, high_frequency=None, dc_elimination=True):
133 | 	    """Compute MFCC features from an audio signal.
134 | 	    :param signal: the audio signal from which to compute features. Should be an N x 1 array
135 | 	    :param sampling_frequency: the sampling frequency of the signal we are working with.
136 | 	    :param frame_length: the length of each frame in seconds. Default is 0.020s
137 | 	    :param frame_stride: the step between successive frames in seconds. Default is 0.02s (means no overlap)
138 | 	    :param num_filters: the number of filters in the filterbank, default 40.
139 | 	    :param fft_length: number of FFT points. Default is 512.
140 | 	    :param low_frequency: lowest band edge of mel filters. In Hz, default is 0.
141 | 	    :param high_frequency: highest band edge of mel filters. In Hz, default is samplerate/2
142 | 	    :param num_cepstral: Number of cepstral coefficients.
143 | 	    :param dc_elimination: hIf the first dc component should be eliminated or not.
144 | 	    :returns: A numpy array of size (num_frames x num_cepstral) containing mfcc features.
145 | 	    """
146 | 
147 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
148 | Filterbank Energy Features
149 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
150 | 
151 | 
152 | .. code-block:: python
153 | 
154 | 	def mfe(signal, sampling_frequency, frame_length=0.020, frame_stride=0.01,
155 | 		  num_filters=40, fft_length=512, low_frequency=0, high_frequency=None):
156 | 	    """Compute Mel-filterbank energy features from an audio signal.
157 | 	    :param signal: the audio signal from which to compute features. Should be an N x 1 array
158 | 	    :param sampling_frequency: the sampling frequency of the signal we are working with.
159 | 	    :param frame_length: the length of each frame in seconds. Default is 0.020s
160 | 	    :param frame_stride: the step between successive frames in seconds. Default is 0.02s (means no overlap)
161 | 	    :param num_filters: the number of filters in the filterbank, default 40.
162 | 	    :param fft_length: number of FFT points. Default is 512.
163 | 	    :param low_frequency: lowest band edge of mel filters. In Hz, default is 0.
164 | 	    :param high_frequency: highest band edge of mel filters. In Hz, default is samplerate/2
165 | 	    :returns:
166 | 		      features: the energy of fiterbank: num_frames x num_filters
167 | 		      frame_energies: the energy of each frame: num_frames x 1
168 | 	    """
169 | 
170 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
171 | log - Filterbank Energy Features
172 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
173 | 
174 | The attributes for ``log_filterbank energies`` are the same for ``filterbank energies`` too.
175 | 
176 | .. code-block:: python
177 | 
178 | 	def lmfe(signal, sampling_frequency, frame_length=0.020, frame_stride=0.01,
179 |              num_filters=40, fft_length=512, low_frequency=0, high_frequency=None):
180 | 	    """Compute log Mel-filterbank energy features from an audio signal.
181 | 	    :param signal: the audio signal from which to compute features. Should be an N x 1 array
182 | 	    :param sampling_frequency: the sampling frequency of the signal we are working with.
183 | 	    :param frame_length: the length of each frame in seconds. Default is 0.020s
184 | 	    :param frame_stride: the step between successive frames in seconds. Default is 0.02s (means no overlap)
185 | 	    :param num_filters: the number of filters in the filterbank, default 40.
186 | 	    :param fft_length: number of FFT points. Default is 512.
187 | 	    :param low_frequency: lowest band edge of mel filters. In Hz, default is 0.
188 | 	    :param high_frequency: highest band edge of mel filters. In Hz, default is samplerate/2
189 | 	    :returns:
190 | 		      features: the energy of fiterbank: num_frames x num_filters
191 | 		      frame_log_energies: the log energy of each frame: num_frames x 1
192 | 	    """
193 | 
194 | ~~~~~~~~~~~~
195 | Stack Frames
196 | ~~~~~~~~~~~~
197 | 
198 | In ``Stack_Frames`` function, the stack of frames will be generated from the signal.
199 | 
200 | .. code-block:: python
201 | 
202 | 	def stack_frames(sig, sampling_frequency, frame_length=0.020, frame_stride=0.020, Filter=lambda x: numpy.ones((x,)),
203 |                  zero_padding=True):
204 | 	    """Frame a signal into overlapping frames.
205 | 	    :param sig: The audio signal to frame of size (N,).
206 | 	    :param sampling_frequency: The sampling frequency of the signal.
207 | 	    :param frame_length: The length of the frame in second.
208 | 	    :param frame_stride: The stride between frames.
209 | 	    :param Filter: The time-domain filter for applying to each frame. By default it is one so nothing will be changed.
210 | 	    :param zero_padding: If the samples is not a multiple of frame_length(number of frames sample), zero padding will
211 | 				 be done for generating last frame.
212 | 	    :returns: Array of frames. size: number_of_frames x frame_len.
213 | 	    """
214 | 
215 | ---------------------
216 | Post Processing
217 | ---------------------
218 | 
219 | There are some post-processing operation that are supported in ``speechpy``.
220 | 
221 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
222 | Global cepstral mean and variance normalization (CMVN)
223 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
224 | 
225 | This function performs global cepstral mean and variance normalization
226 | (CMVN) to remove the channel effects. The code assumes that there is one
227 | observation per row.
228 | 
229 | .. code-block:: python
230 | 
231 |   def cmvn(vec, variance_normalization=False):
232 |       """
233 |       This function is aimed to perform global ``cepstral mean and variance normalization``
234 |       (CMVN) on input feature vector "vec". The code assumes that there is one observation per row.
235 | 
236 |       :param:
237 |             vec: input feature matrix (size:(num_observation,num_features))
238 |             variance_normalization: If the variance normilization should be performed or not.
239 |       :return:
240 |             The mean(or mean+variance) normalized feature vector.
241 |       """
242 | 
243 | 
244 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
245 | Local cepstral mean and variance normalization (CMVN) over a sliding window
246 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
247 | 
248 | This function performs local cepstral mean and variance normalization
249 | (CMVN) over sliding windows. The code assumes that there is one
250 | observation per row.
251 | 
252 | .. code-block:: python
253 | 
254 |     def cmvnw(vec, win_size=301, variance_normalization=False):
255 |         """
256 |         This function is aimed to perform local cepstral mean and variance normalization on a sliding window.
257 |         (CMVN) on input feature vector "vec". The code assumes that there is one observation per row.
258 |         :param
259 |               vec: input feature matrix (size:(num_observation,num_features))
260 |               win_size: The size of sliding window for local normalization and should be odd.
261 |                         default=301 which is around 3s if 100 Hz rate is considered(== 10ms frame stide)
262 |               variance_normalization: If the variance normilization should be performed or not.
263 | 
264 |         :return: The mean(or mean+variance) normalized feature vector.
265 |         """
266 | 
267 | -----
268 | Tests
269 | -----
270 | 
271 | SpeechPy includes some unit tests. To run the tests, ``cd`` into the
272 | ``speechpy/tests`` directory and run:
273 | 
274 | .. code-block:: shell
275 | 
276 |      python -m pytest
277 | 
278 | For installing the requirements you only need to install ``pytest``.
279 | 
280 | ------------
281 | Example
282 | ------------
283 | 
284 | The test example can be seen in ``test/test.py`` as below:
285 | 
286 | .. code-block:: python
287 | 
288 |     import scipy.io.wavfile as wav
289 |     import numpy as np
290 |     import speechpy
291 |     import os
292 | 
293 |     file_name = os.path.join(os.path.dirname(os.path.abspath(__file__)),'Alesis-Sanctuary-QCard-AcoustcBas-C2.wav')
294 |     fs, signal = wav.read(file_name)
295 |     signal = signal[:,0]
296 | 
297 |     # Example of pre-emphasizing.
298 |     signal_preemphasized = speechpy.processing.preemphasis(signal, cof=0.98)
299 | 
300 |     # Example of staching frames
301 |     frames = speechpy.processing.stack_frames(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, filter=lambda x: np.ones((x,)),
302 |              zero_padding=True)
303 | 
304 |     # Example of extracting power spectrum
305 |     power_spectrum = speechpy.processing.power_spectrum(frames, fft_points=512)
306 |     print('power spectrum shape=', power_spectrum.shape)
307 | 
308 |     ############# Extract MFCC features #############
309 |     mfcc = speechpy.feature.mfcc(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01,
310 |                  num_filters=40, fft_length=512, low_frequency=0, high_frequency=None)
311 |     mfcc_cmvn = speechpy.processing.cmvnw(mfcc,win_size=301,variance_normalization=True)
312 |     print('mfcc(mean + variance normalized) feature shape=', mfcc_cmvn.shape)
313 | 
314 |     mfcc_feature_cube = speechpy.feature.extract_derivative_feature(mfcc)
315 |     print('mfcc feature cube shape=', mfcc_feature_cube.shape)
316 | 
317 |     ############# Extract logenergy features #############
318 |     logenergy = speechpy.feature.lmfe(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01,
319 |                  num_filters=40, fft_length=512, low_frequency=0, high_frequency=None)
320 |     logenergy_feature_cube = speechpy.feature.extract_derivative_feature(logenergy)
321 |     print('logenergy features=', logenergy.shape)
322 | 
323 | For extracting the feature at first, the signal samples will be stacked into frames. The features are computed for each frame in the stacked frames collection.
324 | 
325 | ---------------------
326 | Dependencies
327 | ---------------------
328 | 
329 | Two packages of ``Scipy`` and ``NumPy`` are the required dependencies which will be installed automatically by running the ``setup.py`` file.
330 | 
331 | ---------------------
332 | Acknowledgements
333 | ---------------------
334 | 
335 | This work is based upon a work supported by the Center for Identification Technology Research and the National Science Foundation under Grant #1650474.
336 | 
337 | 
338 | ---------------------
339 | Contributing
340 | ---------------------
341 | 
342 | When contributing to this repository, you are more than welcome to discuss your feedback with any of the owners of this repository. *For typos, please do not create a pull request. Instead, declare them in issues or email the repository owner*. For technical and conceptual questions please feel free to **directly contact the repository owner**. Before asking general questions related to the concepts and techniques provided in this project, **please make sure to read and understand its associated paper**.
343 | 
344 | ~~~~~~~~~~~~~~~~~~~~~~~~
345 | Pull Request Process
346 | ~~~~~~~~~~~~~~~~~~~~~~~~
347 | 
348 | Please consider the following criterions in order to help us in a better way:
349 | 
350 | 1. The pull request is mainly expected to be a code script suggestion or improvement.
351 | 2. A pull request related to non-code-script sections is expected to make a significant difference in the documentation. Otherwise, it is expected to be announced in the issues section.
352 | 3. Ensure any install or build dependencies are removed before the end of the layer when doing a
353 |    build and creating a pull request.
354 | 4. Add comments with details of changes to the interface, this includes new environment
355 |    variables, exposed ports, useful file locations and container parameters.
356 | 5. You may merge the Pull Request in once you have the sign-off of at least one other developer, or if you
357 |    do not have permission to do that, you may request the owner to merge it for you if you believe all checks are passed.
358 | 
359 | ~~~~~~~~~~~~~~~~~~~~~~~~
360 | Declaring issues
361 | ~~~~~~~~~~~~~~~~~~~~~~~~
362 | 
363 | For declaring issues, you can directly email the repository owner. However, preferably please create an issue as it might be
364 | the issue that other repository followers may encounter. That way, the question to other developers will be answered as well.
365 | 
366 | ~~~~~~~~~~~~~~~~~~~~~~~~
367 | Final Note
368 | ~~~~~~~~~~~~~~~~~~~~~~~~
369 | 
370 | We are looking forward to your kind feedback. Please help us to improve this open source project and make our work better.
371 | For contribution, please create a pull request and we will investigate it promptly. Once again, we appreciate
372 | your kind feedback and elaborate code inspections.
373 | 
374 | 
375 | 
376 | ---------------------
377 | Disclaimer
378 | ---------------------
379 | 
380 | Although by dramatic chages, some portion of this library is inspired by the `python speech features`_ library.
381 | 
382 | .. _python speech features: https://github.com/jameslyons/python_speech_features
383 | 
384 | We clain the following advantages for our library:
385 | 
386 | 1. More accurate operations have been performed for the mel-frequency calculations.
387 | 2. The package supports different ``Python`` versions.
388 | 3. The feature are generated in a more organized way as cubic features.
389 | 4. The package is well-tested and integrated.
390 | 5. The package is up-to-date and actively developing.
391 | 6. The package has been used for research purposes.
392 | 7. Exceptions and extreme cases are handled in this library.
393 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/__init__.py


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-architect


--------------------------------------------------------------------------------
/_images/Speech_GIF.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/_images/Speech_GIF.gif


--------------------------------------------------------------------------------
/_images/follow-twitter.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/_images/follow-twitter.gif


--------------------------------------------------------------------------------
/_images/pipeline.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/_images/pipeline.jpg


--------------------------------------------------------------------------------
/_images/readme.rst:
--------------------------------------------------------------------------------
1 | The images used for this repository.
2 | 


--------------------------------------------------------------------------------
/_images/speech.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/_images/speech.gif


--------------------------------------------------------------------------------
/_images/speechpy_logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/_images/speechpy_logo.gif


--------------------------------------------------------------------------------
/_images/stackframes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/_images/stackframes.png


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = SpeechPy
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-time-machine


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | -e git://github.com/snide/sphinx_rtd_theme.git#egg=sphinx_rtd_theme
3 | 


--------------------------------------------------------------------------------
/docs/source/_static/img/08063416.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/docs/source/_static/img/08063416.pdf


--------------------------------------------------------------------------------
/docs/source/_static/img/Speech_GIF.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/docs/source/_static/img/Speech_GIF.gif


--------------------------------------------------------------------------------
/docs/source/_static/img/installation_logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/docs/source/_static/img/installation_logo.gif


--------------------------------------------------------------------------------
/docs/source/_static/img/installation_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/docs/source/_static/img/installation_logo.jpg


--------------------------------------------------------------------------------
/docs/source/_static/img/speech.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/docs/source/_static/img/speech.gif


--------------------------------------------------------------------------------
/docs/source/_static/img/speech.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/docs/source/_static/img/speech.jpg


--------------------------------------------------------------------------------
/docs/source/_static/img/speechpy_logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/docs/source/_static/img/speechpy_logo.gif


--------------------------------------------------------------------------------
/docs/source/_static/img/speechpy_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/docs/source/_static/img/speechpy_logo.jpg


--------------------------------------------------------------------------------
/docs/source/_static/img/stackframes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/docs/source/_static/img/stackframes.png


--------------------------------------------------------------------------------
/docs/source/_templates/breadcrumbs.html:
--------------------------------------------------------------------------------
 1 | {# Support for Sphinx 1.3+ page_source_suffix, but don't break old builds. #}
 2 | 
 3 | {% if page_source_suffix %}
 4 | {% set suffix = page_source_suffix %}
 5 | {% else %}
 6 | {% set suffix = source_suffix %}
 7 | {% endif %}
 8 | 
 9 | {% if meta is defined and meta is not none %}
10 | {% set check_meta = True %}
11 | {% else %}
12 | {% set check_meta = False %}
13 | {% endif %}
14 | 
15 | {% if check_meta and 'github_url' in meta %}
16 | {% set display_github = True %}
17 | {% endif %}
18 | 
19 | {% if check_meta and 'bitbucket_url' in meta %}
20 | {% set display_bitbucket = True %}
21 | {% endif %}
22 | 
23 | {% if check_meta and 'gitlab_url' in meta %}
24 | {% set display_gitlab = True %}
25 | {% endif %}
26 | 
27 | <div role="navigation" aria-label="breadcrumbs navigation">
28 | 
29 |   <ul class="wy-breadcrumbs">
30 |     {% block breadcrumbs %}
31 |       <li><a href="{{ pathto(master_doc) }}">{{ _('Docs') }}</a> &raquo;</li>
32 |         {% for doc in parents %}
33 |           <li><a href="{{ doc.link|e }}">{{ doc.title }}</a> &raquo;</li>
34 |         {% endfor %}
35 |       <li>{{ title }}</li>
36 |     {% endblock %}
37 |     {% block breadcrumbs_aside %}
38 |       <li class="wy-breadcrumbs-aside">
39 |         {% if hasdoc(pagename) %}
40 |             {% if display_github %}
41 |             {% if check_meta and 'github_url' in meta %}
42 |               <!-- User defined GitHub URL -->
43 |               <a href="{{ meta['github_url'] }}" class="fa fa-github"> {{ _('Fork on GitHub') }}</a>
44 |             {% else %}
45 |               <a href="https://{{ github_host|default("github.com") }}/{{ github_user }}/{{ github_repo }}/blob/{{ github_version }}{{ conf_py_path }}{{ pagename }}{{ suffix }}" class="fa fa-github"> {{ _('Fork on GitHub') }}</a>
46 |             {% endif %}
47 |           {% elif display_bitbucket %}
48 |             {% if check_meta and 'bitbucket_url' in meta %}
49 |               <!-- User defined Bitbucket URL -->
50 |               <a href="{{ meta['bitbucket_url'] }}" class="fa fa-bitbucket"> {{ _('Edit on Bitbucket') }}</a>
51 |             {% else %}
52 |               <a href="https://bitbucket.org/{{ bitbucket_user }}/{{ bitbucket_repo }}/src/{{ bitbucket_version}}{{ conf_py_path }}{{ pagename }}{{ suffix }}" class="fa fa-bitbucket"> {{ _('Edit on Bitbucket') }}</a>
53 |             {% endif %}
54 |           {% elif display_gitlab %}
55 |             {% if check_meta and 'gitlab_url' in meta %}
56 |               <!-- User defined GitLab URL -->
57 |               <a href="{{ meta['gitlab_url'] }}" class="fa fa-gitlab"> {{ _('Edit on GitLab') }}</a>
58 |             {% else %}
59 |               <a href="https://{{ gitlab_host|default("gitlab.com") }}/{{ gitlab_user }}/{{ gitlab_repo }}/blob/{{ gitlab_version }}{{ conf_py_path }}{{ pagename }}{{ suffix }}" class="fa fa-gitlab"> {{ _('Edit on GitLab') }}</a>
60 |             {% endif %}
61 |           {% elif show_source and source_url_prefix %}
62 |             <a href="{{ source_url_prefix }}{{ pagename }}{{ suffix }}">{{ _('View page source') }}</a>
63 |           {% elif show_source and has_source and sourcename %}
64 |             <a href="{{ pathto('_sources/' + sourcename, true)|e }}" rel="nofollow"> {{ _('View page source') }}</a>
65 |           {% endif %}
66 |         {% endif %}
67 |       </li>
68 |     {% endblock %}
69 |   </ul>
70 | 
71 |   {% if (theme_prev_next_buttons_location == 'top' or theme_prev_next_buttons_location == 'both') and (next or prev) %}
72 |   <div class="rst-breadcrumbs-buttons" role="navigation" aria-label="breadcrumb navigation">
73 |       {% if next %}
74 |         <a href="{{ next.link|e }}" class="btn btn-neutral float-right" title="{{ next.title|striptags|e }}" accesskey="n">Next <span class="fa fa-arrow-circle-right"></span></a>
75 |       {% endif %}
76 |       {% if prev %}
77 |         <a href="{{ prev.link|e }}" class="btn btn-neutral" title="{{ prev.title|striptags|e }}" accesskey="p"><span class="fa fa-arrow-circle-left"></span> Previous</a>
78 |       {% endif %}
79 |   </div>
80 |   {% endif %}
81 |   <hr/>
82 | </div>
83 | 


--------------------------------------------------------------------------------
/docs/source/_templates/breadcrumbs.html~:
--------------------------------------------------------------------------------
 1 | {# Support for Sphinx 1.3+ page_source_suffix, but don't break old builds. #}
 2 | 
 3 | {% if page_source_suffix %}
 4 | {% set suffix = page_source_suffix %}
 5 | {% else %}
 6 | {% set suffix = source_suffix %}
 7 | {% endif %}
 8 | 
 9 | {% if meta is defined and meta is not none %}
10 | {% set check_meta = True %}
11 | {% else %}
12 | {% set check_meta = False %}
13 | {% endif %}
14 | 
15 | {% if check_meta and 'github_url' in meta %}
16 | {% set display_github = True %}
17 | {% endif %}
18 | 
19 | {% if check_meta and 'bitbucket_url' in meta %}
20 | {% set display_bitbucket = True %}
21 | {% endif %}
22 | 
23 | {% if check_meta and 'gitlab_url' in meta %}
24 | {% set display_gitlab = True %}
25 | {% endif %}
26 | 
27 | <div role="navigation" aria-label="breadcrumbs navigation">
28 | 
29 |   <ul class="wy-breadcrumbs">
30 |     {% block breadcrumbs %}
31 |       <li><a href="{{ pathto(master_doc) }}">{{ _('Docs') }}</a> &raquo;</li>
32 |         {% for doc in parents %}
33 |           <li><a href="{{ doc.link|e }}">{{ doc.title }}</a> &raquo;</li>
34 |         {% endfor %}
35 |       <li>{{ title }}</li>
36 |     {% endblock %}
37 |     {% block breadcrumbs_aside %}
38 |       <li class="wy-breadcrumbs-aside">
39 |         {% if hasdoc(pagename) %}
40 |             {% if display_github %}
41 |             {% if check_meta and 'github_url' in meta %}
42 |               <!-- User defined GitHub URL -->
43 |               <a href="{{ meta['github_url'] }}" class="fa fa-github"> {{ _('Fork on GitHub') }}</a>
44 |             {% else %}
45 |               <a href="https://{{ github_host|default("github.com") }}/{{ github_user }}/{{ github_repo }}/blob/{{ github_version }}{{ conf_py_path }}{{ pagename }}{{ suffix }}" class="fa fa-github"> {{ _('Edit on GitHub') }}</a>
46 |             {% endif %}
47 |           {% elif display_bitbucket %}
48 |             {% if check_meta and 'bitbucket_url' in meta %}
49 |               <!-- User defined Bitbucket URL -->
50 |               <a href="{{ meta['bitbucket_url'] }}" class="fa fa-bitbucket"> {{ _('Edit on Bitbucket') }}</a>
51 |             {% else %}
52 |               <a href="https://bitbucket.org/{{ bitbucket_user }}/{{ bitbucket_repo }}/src/{{ bitbucket_version}}{{ conf_py_path }}{{ pagename }}{{ suffix }}" class="fa fa-bitbucket"> {{ _('Edit on Bitbucket') }}</a>
53 |             {% endif %}
54 |           {% elif display_gitlab %}
55 |             {% if check_meta and 'gitlab_url' in meta %}
56 |               <!-- User defined GitLab URL -->
57 |               <a href="{{ meta['gitlab_url'] }}" class="fa fa-gitlab"> {{ _('Edit on GitLab') }}</a>
58 |             {% else %}
59 |               <a href="https://{{ gitlab_host|default("gitlab.com") }}/{{ gitlab_user }}/{{ gitlab_repo }}/blob/{{ gitlab_version }}{{ conf_py_path }}{{ pagename }}{{ suffix }}" class="fa fa-gitlab"> {{ _('Edit on GitLab') }}</a>
60 |             {% endif %}
61 |           {% elif show_source and source_url_prefix %}
62 |             <a href="{{ source_url_prefix }}{{ pagename }}{{ suffix }}">{{ _('View page source') }}</a>
63 |           {% elif show_source and has_source and sourcename %}
64 |             <a href="{{ pathto('_sources/' + sourcename, true)|e }}" rel="nofollow"> {{ _('View page source') }}</a>
65 |           {% endif %}
66 |         {% endif %}
67 |       </li>
68 |     {% endblock %}
69 |   </ul>
70 | 
71 |   {% if (theme_prev_next_buttons_location == 'top' or theme_prev_next_buttons_location == 'both') and (next or prev) %}
72 |   <div class="rst-breadcrumbs-buttons" role="navigation" aria-label="breadcrumb navigation">
73 |       {% if next %}
74 |         <a href="{{ next.link|e }}" class="btn btn-neutral float-right" title="{{ next.title|striptags|e }}" accesskey="n">Next <span class="fa fa-arrow-circle-right"></span></a>
75 |       {% endif %}
76 |       {% if prev %}
77 |         <a href="{{ prev.link|e }}" class="btn btn-neutral" title="{{ prev.title|striptags|e }}" accesskey="p"><span class="fa fa-arrow-circle-left"></span> Previous</a>
78 |       {% endif %}
79 |   </div>
80 |   {% endif %}
81 |   <hr/>
82 | </div>
83 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # SpeechPy documentation build configuration file, created by
  4 | # sphinx-quickstart on Wed Nov 22 14:40:49 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | import os
 20 | import sys
 21 | sys.path.insert(0, os.path.abspath('../../'))
 22 | import speechpy
 23 | import numpy
 24 | import sphinx_rtd_theme
 25 | 
 26 | 
 27 | # -- General configuration ------------------------------------------------
 28 | 
 29 | # If your documentation needs a minimal Sphinx version, state it here.
 30 | #
 31 | # needs_sphinx = '1.0'
 32 | 
 33 | # Add any Sphinx extension module names here, as strings. They can be
 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 35 | # ones.
 36 | extensions = [
 37 |     'sphinx.ext.autodoc',
 38 |     'sphinx.ext.autosummary',
 39 |     'sphinx.ext.doctest',
 40 |     'sphinx.ext.intersphinx',
 41 |     'sphinx.ext.todo',
 42 |     'sphinx.ext.coverage',
 43 |     'sphinx.ext.mathjax',
 44 |     'sphinx.ext.napoleon',
 45 |     'sphinx.ext.viewcode',
 46 |     # 'sphinxcontrib.googleanalytics',
 47 | ]
 48 | 
 49 | # True to use the :ivar: role for instance variables. False to use the .. attribute:: directive instead. Defaults to False.
 50 | # Refer to http://www.sphinx-doc.org/en/stable/ext/napoleon.html
 51 | napoleon_use_ivar = True
 52 | 
 53 | # Add any paths that contain templates here, relative to this directory.
 54 | templates_path = ['_templates']
 55 | 
 56 | # The suffix(es) of source filenames.
 57 | # You can specify multiple suffix as a list of string:
 58 | #
 59 | # source_suffix = ['.rst', '.md']
 60 | source_suffix = '.rst'
 61 | 
 62 | # The master toctree document.
 63 | master_doc = 'index'
 64 | 
 65 | # General information about the project.
 66 | project = u'SpeechPy'
 67 | copyright = u'2017, Amirsina Torfi'
 68 | author = u'Amirsina Torfi'
 69 | 
 70 | # The version info for the project you're documenting, acts as replacement for
 71 | # |version| and |release|, also used in various other places throughout the
 72 | # built documents.
 73 | #
 74 | version = 'master (' + '2.3.0' + ' )'
 75 | # The full version, including alpha/beta/rc tags.
 76 | # TODO: verify this works as expected
 77 | release = 'master'
 78 | 
 79 | # The language for content autogenerated by Sphinx. Refer to documentation
 80 | # for a list of supported languages.
 81 | #
 82 | # This is also used if you do content translation via gettext catalogs.
 83 | # Usually you set "language" from the command line for these cases.
 84 | language = None
 85 | 
 86 | # List of patterns, relative to source directory, that match files and
 87 | # directories to ignore when looking for source files.
 88 | # This patterns also effect to html_static_path and html_extra_path
 89 | exclude_patterns = []
 90 | 
 91 | # The name of the Pygments (syntax highlighting) style to use.
 92 | pygments_style = 'sphinx'
 93 | 
 94 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 95 | todo_include_todos = False
 96 | 
 97 | 
 98 | # -- Options for HTML output ----------------------------------------------
 99 | 
100 | # The theme to use for HTML and HTML Help pages.  See the documentation for
101 | # a list of builtin themes.
102 | #
103 | html_theme = 'sphinx_rtd_theme'
104 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
105 | 
106 | # Theme options are theme-specific and customize the look and feel of a theme
107 | # further.  For a list of options available for each theme, see the
108 | # documentation.
109 | #
110 | # html_theme_options = {}
111 | 
112 | html_theme_options = {
113 |     'collapse_navigation': False,
114 |     'display_version': True,
115 |     'logo_only': True,
116 |     'sticky_navigation': False
117 | }
118 | 
119 | html_context = {
120 | "display_github": True, # Add 'Edit on Github' link instead of 'View page source'
121 | "last_updated": True,
122 | "commit": False,
123 | }
124 | 
125 | html_logo = '_static/img/speechpy_logo.gif'
126 | 
127 | # Add any paths that contain custom static files (such as style sheets) here,
128 | # relative to this directory. They are copied after the builtin static files,
129 | # so a file named "default.css" will overwrite the builtin "default.css".
130 | html_static_path = ['_static']
131 | 
132 | # -- Options for HTMLHelp output ------------------------------------------
133 | 
134 | # Output file base name for HTML help builder.
135 | htmlhelp_basename = 'SpeechPydoc'
136 | 
137 | 
138 | # -- Options for LaTeX output ---------------------------------------------
139 | 
140 | # -- Options for LaTeX output ---------------------------------------------
141 | 
142 | # latex_engine = 'pdflatex'
143 | 
144 | # latex_engine = 'lualatex'
145 | # latex_elements = {
146 | 
147 | # 'papersize': 'a4paper',
148 | #     'releasename':" ",
149 | #     'figure_align':'htbp',
150 | #     'pointsize': '12pt',
151 | #     'fontpkg': r'''
152 | # \setmainfont{Times New Roman}
153 | # \setsansfont{Times New Roman}
154 | # \setmonofont{Times New Roman}
155 | # ''',
156 | #     'preamble': r'''
157 | # \usepackage[titles]{tocloft}
158 | # \cftsetpnumwidth {1.25cm}\cftsetrmarg{1.5cm}
159 | # \setlength{\cftchapnumwidth}{0.75cm}
160 | # \setlength{\cftsecindent}{\cftchapnumwidth}
161 | # \setlength{\cftsecnumwidth}{1.25cm}
162 | # ''',
163 | #     'fncychap': r'\usepackage[Bjornstrup]{fncychap}',
164 | #     'printindex': r'\footnotesize\raggedright\printindex',
165 | # }
166 | 
167 | 
168 | 
169 | 
170 | latex_elements = {
171 |     # The paper size ('letterpaper' or 'a4paper').
172 |     #
173 |     'papersize': 'letterpaper',
174 | 
175 |     # The font size ('10pt', '11pt' or '12pt').
176 |     #
177 |     'pointsize': '10pt',
178 | 
179 |     # Additional stuff for the LaTeX preamble.
180 |     #
181 |     'preamble': '',
182 | 
183 |     # Latex figure (float) alignment
184 |     #
185 |     'figure_align': 'htbp',
186 | }
187 | 
188 | # Grouping the document tree into LaTeX files. List of tuples
189 | # (source start file, target name, title,
190 | #  author, documentclass [howto, manual, or own class]).
191 | latex_documents = [
192 |     (master_doc, 'test.tex', u'test Documentation',
193 |      u'test', 'manual'),
194 | ]
195 | 
196 | # The name of an image file (relative to this directory) to place at the top of
197 | # the title page.
198 | #
199 | # latex_logo = None
200 | 
201 | # If true, show page references after internal links.
202 | #
203 | # latex_show_pagerefs = False
204 | 
205 | # If true, show URL addresses after external links.
206 | #
207 | # latex_show_urls = False
208 | 
209 | # Documents to append as an appendix to all manuals.
210 | #
211 | # latex_appendices = []
212 | 
213 | # If false, no module index is generated.
214 | #
215 | # latex_domain_indices = True
216 | 
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 | latex_logo = '_static/img/speechpy_logo.jpg'
232 | 
233 | 
234 | # latex_elements = {
235 | #     # The paper size ('letterpaper' or 'a4paper').
236 | #     #
237 | #     'papersize': 'a4paper',
238 | #     'releasename':" ",
239 | #     # Sonny, Lenny, Glenn, Conny, Rejne, Bjarne and Bjornstrup
240 | #     # 'fncychap': '\\usepackage[Lenny]{fncychap}',
241 | #     'fncychap': '\\usepackage{fncychap}',
242 | #     # 'fontpkg': ' ',
243 | #
244 | #     'figure_align':'htbp',
245 | #     # The font size ('10pt', '11pt' or '12pt').
246 | #     #
247 | #     'pointsize': '14pt',
248 | #
249 | #     # Additional stuff for the LaTeX preamble.
250 | #
251 | #     # 'preamble': r'''
252 | #     #     %%%%%%%%%%%%%%%%%%%% Sina %%%%%%%%%%%%%%%%%%
253 | #     #     %%%add number to subsubsection 2=subsection, 3=subsubsection
254 | #     #     %%% below subsubsection is not good idea.
255 | #     #     \setcounter{secnumdepth}{3}
256 | #     #     %
257 | #     #     %%%% Table of content upto 2=subsection, 3=subsubsection
258 | #     #     %\setcounter{tocdepth}{2}
259 | #     #
260 | #     #     \usepackage{amsmath,amsfonts,amssymb,amsthm}
261 | #     #     \usepackage{graphicx}
262 | #     #
263 | #     #     %\usepackage{minted}
264 | #     #     %\fvset{breaklines=true}
265 | #     #
266 | #     #     %%% reduce spaces for Table of contents, figures and tables
267 | #     #     %%% it is used "\addtocontents{toc}{\vskip -1.2cm}" etc. in the document
268 | #     #     \usepackage[notlot,nottoc,notlof]{}
269 | #     #
270 | #     #     \usepackage{color}
271 | #     #     \usepackage{transparent}
272 | #     #     \usepackage{eso-pic}
273 | #     #     \usepackage{lipsum}
274 | #     #
275 | #     #     \usepackage{footnotebackref} %%link at the footnote to go to the place of footnote in the text
276 | #     #
277 | #     #     %% spacing between line
278 | #     #     \usepackage{setspace}
279 | #     #     %%%%\onehalfspacing
280 | #     #     %%%%\doublespacing
281 | #     #     %\singlespacing
282 | #     #
283 | #     #
284 | #     #     %%%%%%%%%%% datetime
285 | #     #     \usepackage{datetime}
286 | #     #
287 | #     #     \newdateformat{MonthYearFormat}{%
288 | #     #         \monthname[\THEMONTH], \THEYEAR}
289 | #     #
290 | #     #
291 | #     #     %% RO, LE will not work for 'oneside' layout.
292 | #     #     %% Change oneside to twoside in document class
293 | #     #     %\usepackage{fancyhdr}
294 | #     #     %\pagestyle{fancy}
295 | #     #     %\fancyhf{}
296 | #     #
297 | #     #     %%% Alternating Header for oneside
298 | #     #     %\fancyhead[L]{\ifthenelse{\isodd{\value{page}}}{ \small \nouppercase{\leftmark} }{}}
299 | #     #     %\fancyhead[R]{\ifthenelse{\isodd{\value{page}}}{}{ \small \nouppercase{\rightmark} }}
300 | #     #
301 | #     #     %%% Alternating Header for two side
302 | #     #     %\fancyhead[RO]{\small \nouppercase{\rightmark}}
303 | #     #     %\fancyhead[LE]{\small \nouppercase{\leftmark}}
304 | #     #
305 | #     #     %% for oneside: change footer at right side. If you want to use Left and right then use same as header defined above.
306 | #     #     %\fancyfoot[R]{\ifthenelse{\isodd{\value{page}}}{{\tiny Amirsina Torfi} }{\href{https://github.com/astorfi/speechpy}{\tiny SpeechPy}}}
307 | #     #
308 | #     #     %%% Alternating Footer for two side
309 | #     #     %\fancyfoot[RO, RE]{\scriptsize Amirsina Torfi (amirsina.torfi@gmail.com)}
310 | #     #
311 | #     #     %%% page number
312 | #     #     %\fancyfoot[CO, CE]{\thepage}
313 | #     #
314 | #     #     %\renewcommand{\headrulewidth}{0.5pt}
315 | #     #     %\renewcommand{\footrulewidth}{0.5pt}
316 | #     #
317 | #     #     %\RequirePackage{tocbibind} %%% comment this to remove page number for following
318 | #     #     %\addto\captionsenglish{\renewcommand{\contentsname}{Table of contents}}
319 | #     #     %\addto\captionsenglish{\renewcommand{\listfigurename}{List of figures}}
320 | #     #     %\addto\captionsenglish{\renewcommand{\listtablename}{List of tables}}
321 | #     #     %\addto\captionsenglish{\renewcommand{\listtablename}{List of tables}} %%% Heading for TOC
322 | #     #
323 | #     #
324 | #     #     %%reduce spacing for itemize
325 | #     #     \usepackage{enumitem}
326 | #     #     %\setlist{nosep}
327 | #     #
328 | #     #     %%%%%%%%%%% Quote Styles at the top of chapter
329 | #     #     %\usepackage{epigraph}
330 | #     #     %\setlength{\epigraphwidth}{0.8\columnwidth}
331 | #     #     %\newcommand{\chapterquote}[2]{\epigraphhead[60]{\epigraph{\textit{#1}}{\textbf {\textit{--#2}}}}}
332 | #     #     %%%%%%%%%%% Quote for all places except Chapter
333 | #     #     %\newcommand{\sectionquote}[2]{{\quote{\textit{``#1''}}{\textbf {\textit{--#2}}}}}
334 | #     # ''',
335 | #     #
336 | #     #
337 | #     # 'maketitle': r'''
338 | #     #     \pagenumbering{Roman} %%% to avoid page 1 conflict with actual page 1
339 | #     #
340 | #     #     \begin{titlepage}
341 | #     #         \centering
342 | #     #
343 | #     #         \vspace*{40mm} %%% * is used to give space from top
344 | #     #         \textbf{\Huge {SpeechPy: Speech Recognition Library}}
345 | #     #
346 | #     #         \vspace{0mm}
347 | #     #         \begin{figure}[!h]
348 | #     #             \centering
349 | #     #             \includegraphics[scale=0.8]{speechpy_logo.jpg}
350 | #     #         \end{figure}
351 | #     #
352 | #     #         \vspace{0mm}
353 | #     #         \Large \textbf{{Amirsina Torfi}}
354 | #     #
355 | #     #         % \small Created on : Octorber, 2017
356 | #     #
357 | #     #         \vspace*{0mm}
358 | #     #         \small  Last updated : \MonthYearFormat\today
359 | #     #
360 | #     #
361 | #     #         %% \vfill adds at the bottom
362 | #     #         \vfill
363 | #     #         \small \textit{Please refer to project repository at }{\href{https://github.com/astorfi/speechpy}{SpeechPy}}
364 | #     #     \end{titlepage}
365 | #     #
366 | #     #     \clearpage
367 | #     #     \pagenumbering{roman}
368 | #     #     \tableofcontents
369 | #     #     % \listoffigures
370 | #     #     % \listoftables
371 | #     #     \clearpage
372 | #     #     \pagenumbering{english}
373 | #     #
374 | #     #     ''',
375 | #     # Latex figure (float) alignment
376 | #     #
377 | #     # 'figure_align': 'htbp',
378 | #     # 'sphinxsetup': \
379 | #     #     #'hmargin={0.7in,0.7in}, vmargin={1in,1in}, \
380 | #     #     'verbatimwithframe=true, \
381 | #     #     TitleColor={rgb}{0,0,0}',
382 | #     #     'tableofcontents':' ',
383 | #
384 | # }
385 | 
386 | 
387 | 
388 | # Grouping the document tree into LaTeX files. List of tuples
389 | # (source start file, target name, title,
390 | #  author, documentclass [howto, manual, or own class]).
391 | latex_documents = [
392 |     (master_doc, 'speechpy.tex', 'SpeechPy Documentation',
393 |      'Amirsina Torfi', 'manual'),
394 | ]
395 | 
396 | 
397 | # -- Options for manual page output ---------------------------------------
398 | 
399 | # One entry per manual page. List of tuples
400 | # (source start file, name, description, authors, manual section).
401 | man_pages = [
402 |     (master_doc, 'speechpy', u'SpeechPy Documentation',
403 |      [author], 1)
404 | ]
405 | 
406 | 
407 | # -- Options for Texinfo output -------------------------------------------
408 | 
409 | # Grouping the document tree into Texinfo files. List of tuples
410 | # (source start file, target name, title, author,
411 | #  dir menu entry, description, category)
412 | texinfo_documents = [
413 |     (master_doc, 'SpeechPy', u'SpeechPy Documentation',
414 |      author, 'SpeechPy', 'A library for Speech Recognition and Feature Extraction.',
415 |      'Miscellaneous'),
416 | ]
417 | 
418 | 
419 | # Example configuration for intersphinx: refer to the Python standard library.
420 | intersphinx_mapping = {
421 |     'python': ('https://docs.python.org/3/', None),
422 |     'numpy': ('https://docs.scipy.org/doc/numpy/', None),
423 | }
424 | 
425 | 
426 | 


--------------------------------------------------------------------------------
/docs/source/content/features.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | Features
 5 | =========
 6 | 
 7 | .. automodule:: speechpy.feature
 8 | .. currentmodule:: speechpy.feature
 9 | 
10 | 
11 | :hidden:`MFCC`
12 | ~~~~~~~~~~~~~~
13 | 
14 | .. autofunction:: speechpy.feature.mfcc
15 | 
16 | 
17 | :hidden:`Mel Frequency Energy`
18 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
19 | 
20 | .. autofunction:: speechpy.feature.mfe
21 | 
22 | 
23 | :hidden:`Log Mel Frequency Energy`
24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
25 | 
26 | .. autofunction:: speechpy.feature.lmfe
27 | 
28 | 
29 | :hidden:`Extract Derivative Features`
30 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
31 | 
32 | .. autofunction:: speechpy.feature.extract_derivative_feature
33 | 


--------------------------------------------------------------------------------
/docs/source/content/postprocessing.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | 
 5 | postprocessing
 6 | ==============
 7 | 
 8 | .. automodule:: speechpy.processing
 9 | .. currentmodule:: speechpy.processing
10 | 
11 | 
12 | :hidden:`Global Cepstral Mean and Variance Normalization`
13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14 | 
15 | .. autofunction:: speechpy.processing.cmvn
16 | 
17 | 
18 | :hidden:`Local Cepstral Mean and Variance Normalization over Sliding Window`
19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 | 
21 | .. autofunction:: speechpy.processing.cmvnw
22 | 


--------------------------------------------------------------------------------
/docs/source/content/preprocessing.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | 
 5 | Preprocessing
 6 | =============
 7 | 
 8 | .. automodule:: speechpy.processing
 9 | .. currentmodule:: speechpy.processing
10 | 
11 | :hidden:`Pre-emphasis`
12 | ~~~~~~~~~~~~~~~~~~~~~~~~~
13 | .. autofunction:: speechpy.processing.preemphasis
14 | 
15 | :hidden:`Stacking`
16 | ~~~~~~~~~~~~~~~~~~
17 | .. autofunction:: speechpy.processing.stack_frames
18 | 
19 | :hidden:`FFT Spectrum`
20 | ~~~~~~~~~~~~~~~~~~~~~~
21 | .. autofunction:: speechpy.processing.fft_spectrum
22 | 
23 | :hidden:`Power Spectrum`
24 | ~~~~~~~~~~~~~~~~~~~~~~~~
25 | .. autofunction:: speechpy.processing.power_spectrum
26 | 
27 | :hidden:`Power Spectrum Log`
28 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29 | .. autofunction:: speechpy.processing.log_power_spectrum
30 | 
31 | :hidden:`Derivative Extraction`
32 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
33 | 
34 | .. autofunction:: speechpy.processing.derivative_extraction
35 | 


--------------------------------------------------------------------------------
/docs/source/epilogue/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | ========================
 3 | Contributing
 4 | ========================
 5 | 
 6 | When contributing to this repository, you are more than welcome to discuss your feedback with any of the owners of this repository. *For typos, please do not create a pull request. Instead, declare them in issues or email the repository owner*. For technical and conceptual questions please feel free to **directly contact the repository owner**. Before asking general questions related to the concepts and techniques provided in this project, **please make sure to read and understand its associated paper**. 
 7 | 
 8 | Please note we have a code of conduct, please follow it in all your interactions with the project.
 9 | 
10 | ----------------------
11 | Pull Request Process
12 | ----------------------
13 | 
14 | Please consider the following criterions in order to help us in a better way:
15 | 
16 | 1. The pull request is mainly expected to be a code script suggestion or improvement.
17 | 2. A pull request related to non-code-script sections is expected to make a significant difference in the documentation. Otherwise, it is expected to be announced in the issues section.
18 | 3. Ensure any install or build dependencies are removed before the end of the layer when doing a 
19 |    build and creating a pull request.
20 | 4. Add comments with details of changes to the interface, this includes new environment 
21 |    variables, exposed ports, useful file locations and container parameters.
22 | 5. You may merge the Pull Request in once you have the sign-off of at least one other developer, or if you 
23 |    do not have permission to do that, you may request the owner to merge it for you if you believe all checks are passed.
24 | 
25 | ----------------------
26 | Final Note
27 | ----------------------
28 | 
29 | We are looking forward to your kind feedback. Please help us to improve this open source project and make our work better. 
30 | For contribution, please create a pull request and we will investigate it promptly. Once again, we appreciate 
31 | your kind feedback and elaborate code inspections.
32 | 


--------------------------------------------------------------------------------
/docs/source/epilogue/finalnote.rst:
--------------------------------------------------------------------------------
 1 | ==================
 2 | Epilogue
 3 | ==================
 4 | 
 5 | -------------
 6 | Disclaimer
 7 | -------------
 8 | 
 9 | Although by dramatic chages, some portion of this library is inspired by the `python speech features`_ library.
10 | 
11 | .. _python speech features: https://github.com/jameslyons/python_speech_features
12 | 
13 | We clain the following advantages for our library:
14 | 
15 | 1. More accurate operations have been performed for the mel-frequency calculations.
16 | 2. The package supports different ``Python`` versions.
17 | 3. The feature are generated in a more organized way as cubic features.
18 | 4. The package is well-tested and integrated.
19 | 5. The package is up-to-date and actively developing.
20 | 6. The package has been used for research purposes.
21 | 7. Exceptions and extreme cases are handled in this library.
22 | 
23 | 
24 | -------------
25 | Contributing
26 | -------------
27 | 
28 | When contributing to this repository, please first discuss the change you wish to make via issue,
29 | email, or any other method with the owners of this repository before making a change. *For typos, please
30 | do not create a pull request. Instead, declare them in issues or email the repository owner*.
31 | 
32 | Please note we have a code of conduct, please follow it in all your interactions with the project.
33 | 
34 | --------------------------
35 | Pull Request Process
36 | --------------------------
37 | 
38 | Please consider the following criterions in order to help us in a better way:
39 | 
40 | 1. The pull request is mainly expected to be a code script suggestion or improvement.
41 | 2. A pull request related to non-code-script sections is expected to make a significant difference in the documentation. Otherwise, it is expected to be announced in the issues section.
42 | 3. Ensure any install or build dependencies are removed before the end of the layer when doing a
43 |    build and creating a pull request.
44 | 4. Add comments with details of changes to the interface, this includes new environment
45 |    variables, exposed ports, useful file locations and container parameters.
46 | 5. You may merge the Pull Request in once you have the sign-off of at least one other developer, or if you
47 |    do not have permission to do that, you may request the owner to merge it for you if you believe all checks are passed.
48 | 
49 | -------------
50 | Final Note
51 | -------------
52 | 
53 | We are looking forward to your kind feedback. Please help us to improve this open source project and make our work better.
54 | For contribution, please create a pull request and we will investigate it promptly. Once again, we appreciate
55 | your kind feedback and elaborate code inspections.
56 | 


--------------------------------------------------------------------------------
/docs/source/epilogue/test.rst:
--------------------------------------------------------------------------------
  1 | ============
  2 | test
  3 | ============
  4 | 
  5 | -------------
  6 | Test Package
  7 | -------------
  8 | Once the package has been installed, a test file can be directly run to show the results.
  9 | The test example can be seen in ``test/test_package.py`` as below:
 10 | 
 11 | .. code-block:: python
 12 | 
 13 |     import scipy.io.wavfile as wav
 14 |     import numpy as np
 15 |     import speechpy
 16 |     import os
 17 | 
 18 |     file_name = os.path.join(os.path.dirname(os.path.abspath(__file__)),'Alesis-Sanctuary-QCard-AcoustcBas-C2.wav')
 19 |     fs, signal = wav.read(file_name)
 20 |     signal = signal[:,0]
 21 | 
 22 |     # Example of pre-emphasizing.
 23 |     signal_preemphasized = speechpy.processing.preemphasis(signal, cof=0.98)
 24 | 
 25 |     # Example of staching frames
 26 |     frames = speechpy.processing.stack_frames(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, filter=lambda x: np.ones((x,)),
 27 |          zero_padding=True)
 28 | 
 29 |     # Example of extracting power spectrum
 30 |     power_spectrum = speechpy.processing.power_spectrum(frames, fft_points=512)
 31 |     print('power spectrum shape=', power_spectrum.shape)
 32 | 
 33 |     ############# Extract MFCC features #############
 34 |     mfcc = speechpy.feature.mfcc(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01,
 35 |              num_filters=40, fft_length=512, low_frequency=0, high_frequency=None)
 36 |     mfcc_cmvn = speechpy.processing.cmvnw(mfcc,win_size=301,variance_normalization=True)
 37 |     print('mfcc(mean + variance normalized) feature shape=', mfcc_cmvn.shape)
 38 | 
 39 |     mfcc_feature_cube = speechpy.feature.extract_derivative_feature(mfcc)
 40 |     print('mfcc feature cube shape=', mfcc_feature_cube.shape)
 41 | 
 42 |     ############# Extract logenergy features #############
 43 |     logenergy = speechpy.feature.lmfe(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01,
 44 |              num_filters=40, fft_length=512, low_frequency=0, high_frequency=None)
 45 |     logenergy_feature_cube = speechpy.feature.extract_derivative_feature(logenergy)
 46 |     print('logenergy features=', logenergy.shape)
 47 | 
 48 | 
 49 | 
 50 | 
 51 | 
 52 | 
 53 | -----------
 54 | Test Local
 55 | -----------
 56 | 
 57 | There is an alternative local way of testing without the necessity to package installation.
 58 | The local test example can be found in ``test/test_package.py`` as follows:
 59 | 
 60 | .. code-block:: python
 61 | 
 62 |     import scipy.io.wavfile as wav
 63 |     import numpy as np
 64 |     import os
 65 |     import sys
 66 |     lib_path = os.path.abspath(os.path.join('..'))
 67 |     print(lib_path)
 68 |     sys.path.append(lib_path)
 69 |     import speechpy
 70 |     import os
 71 | 
 72 |     file_name = os.path.join(os.path.dirname(os.path.abspath(__file__)),'Alesis-Sanctuary-QCard-AcoustcBas-C2.wav')
 73 |     fs, signal = wav.read(file_name)
 74 |     signal = signal[:,0]
 75 | 
 76 |     # Example of pre-emphasizing.
 77 |     signal_preemphasized = speechpy.processing.preemphasis(signal, cof=0.98)
 78 | 
 79 |     # Example of staching frames
 80 |     frames = speechpy.processing.stack_frames(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, filter=lambda x: np.ones((x,)),
 81 |              zero_padding=True)
 82 | 
 83 |     # Example of extracting power spectrum
 84 |     power_spectrum = speechpy.processing.power_spectrum(frames, fft_points=512)
 85 |     print('power spectrum shape=', power_spectrum.shape)
 86 | 
 87 |     ############# Extract MFCC features #############
 88 |     mfcc = speechpy.feature.mfcc(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01,
 89 |                  num_filters=40, fft_length=512, low_frequency=0, high_frequency=None)
 90 |     mfcc_cmvn = speechpy.processing.cmvnw(mfcc,win_size=301,variance_normalization=True)
 91 |     print('mfcc(mean + variance normalized) feature shape=', mfcc_cmvn.shape)
 92 | 
 93 |     mfcc_feature_cube = speechpy.feature.extract_derivative_feature(mfcc)
 94 |     print('mfcc feature cube shape=', mfcc_feature_cube.shape)
 95 | 
 96 |     ############# Extract logenergy features #############
 97 |     logenergy = speechpy.feature.lmfe(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01,
 98 |                  num_filters=40, fft_length=512, low_frequency=0, high_frequency=None)
 99 |     logenergy_feature_cube = speechpy.feature.extract_derivative_feature(logenergy)
100 |     print('logenergy features=', logenergy.shape)
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | For ectracting the feature at first, the signal samples will be stacked into frames. The features are computed for each frame in the stacked frames collection.
117 | 
118 | -------------
119 | Dependencies
120 | -------------
121 | 
122 | Two packages of ``Scipy`` and ``NumPy`` are the required dependencies which will be installed automatically by running the ``setup.py`` file.
123 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. SpeechPy documentation master file, created by
 2 |    sphinx-quickstart on Wed Nov 22 14:40:49 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | :github_url: https://github.com/astorfi/speechpy
 7 | 
 8 | Welcome to SpeechPy's documentation!
 9 | ====================================
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 |    :caption: Preface
14 | 
15 |    intro/introductions
16 | 
17 | .. toctree::
18 |    :maxdepth: 2
19 |    :caption: Package Reference
20 | 
21 |    content/preprocessing
22 |    content/features
23 |    content/postprocessing
24 | 
25 | .. toctree::
26 |    :maxdepth: 2
27 |    :caption: Epilogue
28 | 
29 |    epilogue/test
30 |    epilogue/CONTRIBUTING
31 | 
32 | 
33 | Indices and tables
34 | ==================
35 | 
36 | * :ref:`genindex`
37 | * :ref:`modindex`
38 | * :ref:`search`
39 | 


--------------------------------------------------------------------------------
/docs/source/intro/introductions.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | ============
 3 | Introduction
 4 | ============
 5 | 
 6 | -------------------------
 7 | Foreword
 8 | -------------------------
 9 | 
10 | The purpose of this project is to provide a package for speech processing and
11 | feature extraction. This library provides most frequent used speech features including MFCCs and filterbank energies alongside with the log-energy of filterbanks.
12 | 
13 | 
14 | .. image:: ../_static/img/speech.jpg
15 |    :height: 200px
16 |    :width: 400 px
17 |    :scale: 100 %
18 |    :alt: alternate text
19 |    :align: center
20 | 
21 | -------------------------
22 | Motivation
23 | -------------------------
24 | 
25 | There are different motivations for this open source project.
26 | 
27 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
28 | Deep Learning application
29 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
30 | 
31 | One of the main reasons for creating this package was to provide necessary features for deep learning applications such as ASR(Automatic Speech Recognition) or SR(Speaker Recognition).
32 | As a results, most of the features that are necessary are provided hear.
33 | 
34 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
35 | Pythonic Packaging
36 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
37 | 
38 | Another reason for creating this package was to have a Pythonic environment for
39 | speech recognition and feature extraction due to the fact that the Python language
40 | is becoming ubiquotous!
41 | 
42 | 
43 | -------------------------
44 | How to Install?
45 | -------------------------
46 | 
47 | .. image:: ../_static/img/installation_logo.jpg
48 |    :height: 100 px
49 |    :width: 200 px
50 |    :scale: 80 %
51 |    :alt: alternate text
52 |    :align: center
53 | 
54 | 
55 | There are two possible ways for installation of this package: local installation and PyPi.
56 | 
57 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
58 | Local Installation
59 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
60 | 
61 | For local installation at first the repository must be cloned::
62 | 
63 | 	  git clone https://github.com/astorfi/speech_feature_extraction.git
64 | 
65 | 
66 | After cloning the reposity, root to the repository directory then execute::
67 | 
68 |     python setup.py develop
69 | 
70 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
71 | Pypi
72 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
73 | 
74 | The package is available on PyPi. For direct installation simply execute the following:
75 | 
76 | 
77 | .. code-block:: shell
78 | 
79 |      pip install speechpy
80 | 
81 | -------------------------
82 | Citation
83 | -------------------------
84 | 
85 | If you used this package, please cite it as follows:
86 | 
87 | .. code:: bash
88 | 
89 | 	    @misc{amirsina_torfi_2017_840395,
90 |          		author = {Amirsina Torfi},
91 |         		title = {{SpeechPy: Speech recognition and feature extraction}},
92 |          		month = aug,
93 |          		year = 2017,
94 |         		doi = {10.5281/zenodo.840395},
95 |                 url = {https://doi.org/10.5281/zenodo.840395}
96 |             }
97 | 


--------------------------------------------------------------------------------
/example/Alesis-Sanctuary-QCard-AcoustcBas-C2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/example/Alesis-Sanctuary-QCard-AcoustcBas-C2.wav


--------------------------------------------------------------------------------
/example/test_local.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This example is provided to test the package locally.
 3 | There is no need to installing the package using pip. 
 4 | Only forking the project repository is required.
 5 | """
 6 | 
 7 | import scipy.io.wavfile as wav
 8 | import numpy as np
 9 | import os
10 | import sys
11 | lib_path = os.path.abspath(os.path.join('..'))
12 | print(lib_path)
13 | sys.path.append(lib_path)
14 | from speechpy import processing
15 | from speechpy import feature
16 | import os
17 | 
18 | file_name = os.path.join(os.path.dirname(os.path.abspath(__file__)),'Alesis-Sanctuary-QCard-AcoustcBas-C2.wav')
19 | fs, signal = wav.read(file_name)
20 | signal = signal[:,0]
21 | 
22 | # Pre-emphasizing.
23 | signal_preemphasized = processing.preemphasis(signal, cof=0.98)
24 | 
25 | # Staching frames
26 | frames = processing.stack_frames(signal, sampling_frequency=fs,
27 |                                           frame_length=0.020,
28 |                                           frame_stride=0.01,
29 |                                           filter=lambda x: np.ones((x,)),
30 |                                           zero_padding=True)
31 | 
32 | # Extracting power spectrum
33 | power_spectrum = processing.power_spectrum(frames, fft_points=512)
34 | print('power spectrum shape=', power_spectrum.shape)
35 | 
36 | ############# Extract MFCC features #############
37 | mfcc = feature.mfcc(signal, sampling_frequency=fs,
38 |                              frame_length=0.020, frame_stride=0.01,
39 |                              num_filters=40, fft_length=512, low_frequency=0,
40 |                              high_frequency=None)
41 | 
42 | # Cepstral mean variance normalization.
43 | mfcc_cmvn = processing.cmvn(mfcc,variance_normalization=True)
44 | print('mfcc(mean + variance normalized) feature shape=', mfcc_cmvn.shape)
45 | 
46 | # Extracting derivative features
47 | mfcc_feature_cube = feature.extract_derivative_feature(mfcc)
48 | print('mfcc feature cube shape=', mfcc_feature_cube.shape)
49 | 
50 | ############# Extract logenergy features #############
51 | logenergy = feature.lmfe(signal, sampling_frequency=fs,
52 |                                   frame_length=0.020, frame_stride=0.01,
53 |                                   num_filters=40, fft_length=512,
54 |                                   low_frequency=0, high_frequency=None)
55 | logenergy_feature_cube = feature.extract_derivative_feature(logenergy)
56 | print('logenergy features=', logenergy.shape)
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/example/test_package.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This example is provided to test the installed package.
 3 | The package should be installed from PyPi using pip install speechpy.
 4 | """
 5 | 
 6 | import scipy.io.wavfile as wav
 7 | import numpy as np
 8 | import speechpy
 9 | import os
10 | 
11 | # Reading the sample wave file
12 | file_name = os.path.join(os.path.dirname(os.path.abspath(__file__)),
13 |                          'Alesis-Sanctuary-QCard-AcoustcBas-C2.wav')
14 | fs, signal = wav.read(file_name)
15 | signal = signal[:,0]
16 | 
17 | # Pre-emphasizing.
18 | signal_preemphasized = speechpy.processing.preemphasis(signal, cof=0.98)
19 | 
20 | # Staching frames
21 | frames = speechpy.processing.stack_frames(signal, sampling_frequency=fs,
22 |                                           frame_length=0.020,
23 |                                           frame_stride=0.01,
24 |                                           filter=lambda x: np.ones((x,)),
25 |                                           zero_padding=True)
26 | 
27 | # Extracting power spectrum
28 | power_spectrum = speechpy.processing.power_spectrum(frames, fft_points=512)
29 | print('power spectrum shape=', power_spectrum.shape)
30 | 
31 | ############# Extract MFCC features #############
32 | mfcc = speechpy.feature.mfcc(signal, sampling_frequency=fs,
33 |                              frame_length=0.020, frame_stride=0.01,
34 |                              num_filters=40, fft_length=512, low_frequency=0,
35 |                              high_frequency=None)
36 | mfcc_cmvn = speechpy.processing.cmvnw(mfcc,win_size=301,
37 |                                       variance_normalization=True)
38 | print('mfcc(mean + variance normalized) feature shape=', mfcc_cmvn.shape)
39 | 
40 | # Extracting derivative features
41 | mfcc_feature_cube = speechpy.feature.extract_derivative_feature(mfcc)
42 | print('mfcc feature cube shape=', mfcc_feature_cube.shape)
43 | 
44 | ############# Extract logenergy features #############
45 | logenergy = speechpy.feature.lmfe(signal, sampling_frequency=fs,
46 |                                   frame_length=0.020, frame_stride=0.01,
47 |                                   num_filters=40, fft_length=512,
48 |                                   low_frequency=0, high_frequency=None)
49 | logenergy_feature_cube = speechpy.feature.extract_derivative_feature(logenergy)
50 | print('logenergy features=', logenergy.shape)
51 | 
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/paper/paper.bib:
--------------------------------------------------------------------------------
  1 | @inproceedings{hirsch2000aurora,
  2 |   title={The Aurora experimental framework for the performance evaluation of speech recognition systems under noisy conditions},
  3 |   author={Hirsch, Hans-G{\"u}nter and Pearce, David},
  4 |   booktitle={ASR2000-Automatic Speech Recognition: Challenges for the new Millenium ISCA Tutorial and Research Workshop (ITRW)},
  5 |   year={2000}
  6 | }
  7 | 
  8 | @book{guyon2008feature,
  9 |   title={Feature extraction: foundations and applications},
 10 |   author={Guyon, Isabelle and Gunn, Steve and Nikravesh, Masoud and Zadeh, Lofti A},
 11 |   volume={207},
 12 |   year={2008},
 13 |   publisher={Springer}
 14 | }
 15 | 
 16 | @article{furui1986speaker,
 17 |   title={Speaker-independent isolated word recognition using dynamic features of speech spectrum},
 18 |   author={Furui, Sadaoki},
 19 |   journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
 20 |   volume={34},
 21 |   number={1},
 22 |   pages={52--59},
 23 |   year={1986},
 24 |   publisher={IEEE}
 25 | }
 26 | 
 27 | @book{yu2016automatic,
 28 |   title={AUTOMATIC SPEECH RECOGNITION.},
 29 |   author={Yu, Dong and Deng, Li},
 30 |   year={2016},
 31 |   publisher={Springer}
 32 | }
 33 | 
 34 | @book{rabiner1993fundamentals,
 35 |   title={Fundamentals of speech recognition},
 36 |   author={Rabiner, Lawrence R and Juang, Biing-Hwang},
 37 |   volume={14},
 38 |   year={1993},
 39 |   publisher={PTR Prentice Hall Englewood Cliffs}
 40 | }
 41 | 
 42 | @article{campbell1997speaker,
 43 |   title={Speaker recognition: A tutorial},
 44 |   author={Campbell, Joseph P},
 45 |   journal={Proceedings of the IEEE},
 46 |   volume={85},
 47 |   number={9},
 48 |   pages={1437--1462},
 49 |   year={1997},
 50 |   publisher={IEEE}
 51 | }
 52 | 
 53 | 
 54 | @inproceedings{deng2013recent,
 55 |   title={Recent advances in deep learning for speech research at Microsoft},
 56 |   author={Deng, Li and Li, Jinyu and Huang, Jui-Ting and Yao, Kaisheng and Yu, Dong and Seide, Frank and Seltzer, Michael and Zweig, Geoff and He, Xiaodong and Williams, Jason and others},
 57 |   booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on},
 58 |   pages={8604--8608},
 59 |   year={2013},
 60 |   organization={IEEE}
 61 | }
 62 | 
 63 | @inproceedings{lee2009unsupervised,
 64 |   title={Unsupervised feature learning for audio classification using convolutional deep belief networks},
 65 |   author={Lee, Honglak and Pham, Peter and Largman, Yan and Ng, Andrew Y},
 66 |   booktitle={Advances in neural information processing systems},
 67 |   pages={1096--1104},
 68 |   year={2009}
 69 | }
 70 | 
 71 | @inproceedings{yu2011improved,
 72 |   title={Improved bottleneck features using pretrained deep neural networks},
 73 |   author={Yu, Dong and Seltzer, Michael L},
 74 |   booktitle={Twelfth Annual Conference of the International Speech Communication Association},
 75 |   year={2011}
 76 | }
 77 | 
 78 | @article{giannakopoulos2015pyaudioanalysis,
 79 |   title={pyAudioAnalysis: An Open-Source Python Library for Audio Signal Analysis},
 80 |   author={Giannakopoulos, Theodoros},
 81 |   journal={PloS one},
 82 |   volume={10},
 83 |   number={12},
 84 |   year={2015},
 85 |   publisher={Public Library of Science}
 86 | }
 87 | 
 88 | @article{torfi2017text,
 89 |   title={Text-independent speaker verification using 3d convolutional neural networks},
 90 |   author={Torfi, Amirsina and Nasrabadi, Nasser M and Dawson, Jeremy},
 91 |   journal={arXiv preprint arXiv:1705.09422},
 92 |   year={2017}
 93 | }
 94 | 
 95 | @article{torfi20173d,
 96 |   title={3D Convolutional Neural Networks for Cross Audio-Visual Matching Recognition},
 97 |   author={Torfi, Amirsina and Iranmanesh, Seyed Mehdi and Nasrabadi, Nasser and Dawson, Jeremy},
 98 |   journal={IEEE Access},
 99 |   volume={5},
100 |   pages={22081--22091},
101 |   year={2017},
102 |   publisher={IEEE}
103 | }
104 | 
105 | @article{prechelt2000empirical,
106 |   title={An empirical comparison of c, c++, java, perl, python, rexx and tcl},
107 |   author={Prechelt, Lutz},
108 |   journal={IEEE Computer},
109 |   volume={33},
110 |   number={10},
111 |   pages={23--29},
112 |   year={2000}
113 | }
114 | 
115 | @misc{torfispeechpy,
116 |     author       = {Amirsina Torfi},
117 |     title        = {{SpeechPy: Speech recognition and feature extraction}},
118 |     month        = aug,
119 |     year         = 2017,
120 |     doi          = {10.5281/zenodo.810391},
121 |     url          = {https://doi.org/10.5281/zenodo.810391}}
122 | 
123 | @article{torfi2017coupled,
124 |   title={Coupled 3D Convolutional Neural Networks for Audio-Visual Recognition},
125 |   author={Torfi, Amirsina and Iranmanesh, Seyed Mehdi and Nasrabadi, Nasser M and Dawson, Jeremy},
126 |   journal={arXiv preprint arXiv:1706.05739},
127 |   year={2017}
128 | }
129 | 
130 | @article{torfi2017construction,
131 |   title={On the Construction of Polar Codes for Achieving the Capacity of Marginal Channels},
132 |   author={Torfi, Amisina and Soleymani, Sobhan and Vakili, Vahid Tabataba},
133 |   journal={arXiv preprint arXiv:1707.04512},
134 |   year={2017}
135 | }
136 | 
137 | @article{shannon2001mathematical,
138 |   title={A mathematical theory of communication},
139 |   author={Shannon, Claude Elwood},
140 |   journal={ACM SIGMOBILE Mobile Computing and Communications Review},
141 |   volume={5},
142 |   number={1},
143 |   pages={3--55},
144 |   year={2001},
145 |   publisher={ACM}
146 | }
147 | 
148 | @article{gurban2009information,
149 |   title={Information theoretic feature extraction for audio-visual speech recognition},
150 |   author={Gurban, Mihai and Thiran, Jean-Philippe},
151 |   journal={IEEE Transactions on signal processing},
152 |   volume={57},
153 |   number={12},
154 |   pages={4765--4776},
155 |   year={2009},
156 |   publisher={IEEE}
157 | }
158 | 
159 | @inproceedings{variani2014deep,
160 |   title={Deep neural networks for small footprint text-dependent speaker verification},
161 |   author={Variani, Ehsan and Lei, Xin and McDermott, Erik and Moreno, Ignacio Lopez and Gonzalez-Dominguez, Javier},
162 |   booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2014 IEEE International Conference on},
163 |   pages={4052--4056},
164 |   year={2014},
165 |   organization={IEEE}
166 | }
167 | 
168 | @article{hinton2012deep,
169 |   title={Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups},
170 |   author={Hinton, Geoffrey and Deng, Li and Yu, Dong and Dahl, George E and Mohamed, Abdel-rahman and Jaitly, Navdeep and Senior, Andrew and Vanhoucke, Vincent and Nguyen, Patrick and Sainath, Tara N and others},
171 |   journal={IEEE Signal Processing Magazine},
172 |   volume={29},
173 |   number={6},
174 |   pages={82--97},
175 |   year={2012},
176 |   publisher={IEEE}
177 | }
178 | 
179 | @article{lecun2015deep,
180 |   title={Deep learning},
181 |   author={LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
182 |   journal={nature},
183 |   volume={521},
184 |   number={7553},
185 |   pages={436},
186 |   year={2015},
187 |   publisher={Nature Publishing Group}
188 | }
189 | 
190 | @article{liu2015deep,
191 |   title={Deep feature for text-dependent speaker verification},
192 |   author={Liu, Yuan and Qian, Yanmin and Chen, Nanxin and Fu, Tianfan and Zhang, Ya and Yu, Kai},
193 |   journal={Speech Communication},
194 |   volume={73},
195 |   pages={1--13},
196 |   year={2015},
197 |   publisher={Elsevier}
198 | }
199 | 
200 | @article{torfi2018attention,
201 |   title={Attention-Based Guided Structured Sparsity of Deep Neural Networks},
202 |   author={Torfi, Amirsina and Shirvani, Rouzbeh A},
203 |   journal={arXiv preprint arXiv:1802.09902},
204 |   year={2018}
205 | }
206 | 
207 | 
208 | 


--------------------------------------------------------------------------------
/paper/paper.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: 'SpeechPy - A Library for Speech Processing and Recognition'
  3 | tags:
  4 |   - Python
  5 | authors:
  6 |   - name: Amirsina Torfi
  7 |     orcid: 0000-0003-2282-4361
  8 |     affiliation: "1"
  9 | affiliations:
 10 |  - name: Virginia Tech, Department of Computer Science
 11 |    index: 1
 12 | date: 15 May 2018
 13 | bibliography: paper.bib
 14 | ---
 15 | 
 16 | # Abstract
 17 | SpeechPy is an open source Python package that contains speech preprocessing techniques, speech features, and important post-processing operations. It provides most frequent used speech features including MFCCs and filterbank energies alongside with the log-energy of filter-banks. The aim of the package is to provide researchers with a simple tool for speech feature extraction and processing purposes in applications such as Automatic Speech Recognition and Speaker Verification.
 18 | 
 19 | # Introduction
 20 | Automatic Speech Recognition (ASR) requires three main components for
 21 | further analysis: Preprocessing, feature extraction, and
 22 | post-processing. Feature extraction, in an abstract meaning, is
 23 | extracting descriptive features from raw signal for speech
 24 | classification purposes. Due to the high
 25 | dimensionality, the raw signal can be less informative compared to
 26 | extracted higher level features. Feature extraction comes to our rescue
 27 | for turning the high dimensional signal to a lower dimensional and yet
 28 | a more informative version of that for sound recognition and
 29 | classification [@furui1986speaker; @guyon2008feature; @hirsch2000aurora].
 30 | 
 31 | ![Scheme of speech recognition](test/_imgs/Scheme_of_speech_recognition_system.png)
 32 | 
 33 | Feature extraction, in essence, should be done considering the specific
 34 | application at hand. For example, in ASR applications, the linguistic
 35 | characteristics of the raw signal are of great importance and the other
 36 | characteristics must be
 37 | ignored [@yu2016automatic; @rabiner1993fundamentals]. On the other hand,
 38 | in Speaker Recognition (SR) task, solely voice-associated information
 39 | must be contained in the extracted feature [@campbell1997speaker]. So the
 40 | feature extraction goal is to extract the relevant feature from the raw
 41 | signal and map it to a lower dimensional feature space. The problem of
 42 | feature extraction has been investigated in pattern classification aimed
 43 | at preventing the curse of dimensionality. There are some feature
 44 | extraction approaches based on information theory
 45 | [@torfi2017construction; @shannon2001mathematical] applied to multimodal
 46 | signals and demonstrated promising results [@gurban2009information].
 47 | 
 48 | The speech features can be categorized into two general types of
 49 | acoustic and linguistic features. The former one is mainly related to
 50 | non-verbal sounds and the later one is associated with ASR and SR
 51 | systems for which verbal part has the major role. Perhaps one of the most
 52 | famous linguistic feature which is hard to beat is the Mel-Frequency
 53 | Cepstral Coefficients (MFCC). It uses speech raw frames in the range
 54 | from 20ms to 40ms for having stationary
 55 | characteristics [@rabiner1993fundamentals]. MFCC is widely used for both
 56 | ASR and SR tasks and more recently in the associated deep learning
 57 | applications as the input to the network rather than directly feeding
 58 | the signal [@deng2013recent; @lee2009unsupervised; @yu2011improved].
 59 | With the advent of deep learning [@lecun2015deep; @torfi2018attention],
 60 | major improvements have been achieved by using deep neural networks
 61 | rather than traditional methods for speech recognition
 62 | applications [@variani2014deep; @hinton2012deep; @liu2015deep].
 63 | 
 64 | With the availability of free software for speech recognition such as
 65 | VOICEBOX, most of these softwares are Matlab-based which limits
 66 | their reproducibility due to commercial issues. Another great package is
 67 | PyAudioAnalysis [@giannakopoulos2015pyaudioanalysis], which is a
 68 | the comprehensive package developed in Python. However, the issue with
 69 | PyAudioAnalysis is that its complexity and being too verbose for
 70 | extracting simple features and it also lacks some important
 71 | preprocessing and post-processing operations for its current version.
 72 | 
 73 | Considering the recent advent of deep learning in ASR and SR and the
 74 | importance of the accurate speech feature extraction, here are the
 75 | motivations behind SpeechPy package:
 76 | 
 77 |   * Developing a free open source package which covers important
 78 |     preprocessing techniques, speech features, and post-processing
 79 |     operations required for ASR and SR applications.
 80 | 
 81 |   * A simple package with a minimum degree of complexity should be
 82 |     available for beginners.
 83 | 
 84 |   * A well-tested and continuously integrated package for future
 85 |     developments should be developed.
 86 | 
 87 | SpeechPy has been developed to satisfy the aforementioned needs. It
 88 | contains the most important preprocessing and post-processing operations
 89 | and a selection of frequently used speech features. The package is free
 90 | and released as an open source software. Continuous integration
 91 | using for instant error check and validity of changes has been deployed
 92 | for SpeechPy. Moreover, prior to the latest official release of
 93 | SpeechPy, the package has successfully been utilized for research
 94 | purposes [@torfi20173d; @torfi2017text].
 95 | 
 96 | # Package Eco-system
 97 | 
 98 | 
 99 | SpeechPy has been developed using Python language for its interface and
100 | backed as well. An empirical study demonstrated that Python as a
101 | scripting language, is more effective and productive than conventional
102 | languages for some programming problems and memory consumption is
103 | often “better than Java and not much worse than C or
104 | C++” [@prechelt2000empirical]. We chose Python due to its simplicity and
105 | popularity. Third-party libraries are avoided except *Numpy* and *Scipy*
106 | for handling data and numeric computations.
107 | 
108 | ## Complexity
109 | 
110 | As the user should not and does not even need to manipulate the internal
111 | package structure, object-oriented programming is mostly used for
112 | package development which provides an easier interface for the user with a
113 | sacrifice to the simplicity of the code. However, the internal code
114 | complexity of the package does not affect the user experience since the
115 | modules can easily be called with the associated arguments. SpeechPy is
116 | a library with a collection of sub-modules.
117 | 
118 | ## Code Style and Documentation
119 | 
120 | SpeechPy is constructed based on PEP 8 style guide for Python codes.
121 | Moreover, it is extensively documented using the formatted docstrings
122 | and Sphinx for further automatic modifications to the document in
123 | case of changing internal modules. The full documentation of the project
124 | will be generated in HTML and PDF format using Sphinx and is hosted
125 | online. The official releases of the project are hosted on the Zenodo as
126 | well [@torfispeechpy].
127 | 
128 | ![A general view of the package](test/_imgs/packageview.png)
129 | 
130 | ## Continuous Testing and Extensibility
131 | 
132 | The output of each function has been evaluated as well as using different
133 | tests as opposed to the other existing standard packages. For continuous
134 | testing, the code is hosted on GitHub and integrated with Travis CI.
135 | Each modification to the code must pass the unit tests defined for the
136 | continuous integration. This will ensure the package does not break with
137 | unadapted code scripts. However, the validity of the modifications
138 | should always be investigated with the owner or authorized collaborators
139 | of the project. The code will be tested at each time of modification for
140 | Python versions *“2.7”*, *“3.4”* and *“3.5”*. In the future, these
141 | versions are subject to change.
142 | 
143 | ![Travic CI web interface after testing SpeechPy against a new change](test/_imgs/travicCI.png)
144 | 
145 | # Availability
146 | 
147 | ## Operating system {#operating-system .unnumbered}
148 | 
149 | Tested on Ubuntu 14.04 and 16.04 LTS Linux, Apple Mac OS X 10.9.5 , and
150 | Microsoft Windows 7 & 10. We expect that SpeechPy works on any
151 | distribution as long as Python and the package dependencies are
152 | installed.
153 | 
154 | ## Programming language {#programming-language .unnumbered}
155 | 
156 | The package has been tested with Python 2.7, 3.4 and 3.5. However, using
157 | Python 3.5 is suggested.
158 | 
159 | ## Additional system requirements & dependencies {#additional-system-requirements-dependencies .unnumbered}
160 | 
161 | SpeechPy is a light package and small computational power would be
162 | enough for running it. Although the speed of the execution is totally
163 | dependent on the system architecture. The dependencies are as follows:
164 | 
165 |   * Numpy
166 | 
167 |   * SciPy
168 | 
169 | # Acknowledgement
170 | 
171 | This work has been completed with computational resources provided by the West Virginia University and Virginia Tech and is based upon a work
172 | supported by the Center for Identification Technology Research (CITeR) and the National Science Foundation (NSF) under Grant \#1650474.
173 | I would like to thank professor Nasser Nasrabadi for supporting me through this project and for his valuable supervision regarding my research in speech technology.
174 | 
175 | # References
176 | 


--------------------------------------------------------------------------------
/paper/paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/paper/paper.pdf


--------------------------------------------------------------------------------
/paper/test/_imgs/Scheme_of_speech_recognition_system.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/paper/test/_imgs/Scheme_of_speech_recognition_system.png


--------------------------------------------------------------------------------
/paper/test/_imgs/packageview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/paper/test/_imgs/packageview.png


--------------------------------------------------------------------------------
/paper/test/_imgs/travicCI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/paper/test/_imgs/travicCI.png


--------------------------------------------------------------------------------
/paper/test/test.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: 'SpeechPy - A Library for Speech Processing and Recognition'
  3 | tags:
  4 |   - Python
  5 | authors:
  6 |   - name: Amirsina Torfi
  7 |     orcid: 0000-0003-2282-4361
  8 |     affiliation: "1"
  9 | affiliations:
 10 |  - name: Virginia Tech, Department of Computer Science
 11 |    index: 1
 12 | date: 15 May 2018
 13 | bibliography: paper.bib
 14 | ---
 15 | 
 16 | # Abstract
 17 | SpeechPy is an open source Python package that contains speech preprocessing techniques, speech features, and important post-processing operations. It provides most frequent used speech features including MFCCs and filterbank energies alongside with the log-energy of filter-banks. The aim of the package is to provide researchers with a simple tool for speech feature extraction and processing purposes in applications such as Automatic Speech Recognition and Speaker Verification.
 18 | 
 19 | # Introduction
 20 | Automatic Speech Recognition (ASR) requires three main components for
 21 | further analysis: Preprocessing, feature extraction, and
 22 | post-processing. Feature extraction, in an abstract meaning, is
 23 | extracting descriptive features from raw signal for speech
 24 | classification purposes. Due to the high
 25 | dimensionality, the raw signal can be less informative compared to
 26 | extracted higher level features. Feature extraction comes to our rescue
 27 | for turning the high dimensional signal to a lower dimensional and yet
 28 | more informative version of that for sound recognition and
 29 | classification [@furui1986speaker; @guyon2008feature; @hirsch2000aurora].
 30 | 
 31 | ![Scheme of speech recognition](_imgs/Scheme_of_speech_recognition_system.png)
 32 | 
 33 | Feature extraction, in essence, should be done considering the specific
 34 | application at hand. For example, in ASR applications, the linguistic
 35 | characteristics of the raw signal are of great importance and the other
 36 | characteristics must be
 37 | ignored [@yu2016automatic; @rabiner1993fundamentals]. On the other hand,
 38 | in Speaker Recognition (SR) task, solely voice-associated information
 39 | must be contained in extracted feature [@campbell1997speaker]. So the
 40 | feature extraction goal is to extract the relevant feature from the raw
 41 | signal and map it to a lower dimensional feature space. The problem of
 42 | feature extraction has been investigated in pattern classification aimed
 43 | at preventing the curse of dimensionality. There are some feature
 44 | extraction approaches based on information theory
 45 | [@torfi2017construction; @shannon2001mathematical] applied to multimodal
 46 | signals and demonstrated promising results [@gurban2009information].
 47 | 
 48 | The speech features can be categorized into two general types of
 49 | acoustic and linguistic features. The former one is mainly related to
 50 | non-verbal sounds and the later one is associated with ASR and SR
 51 | systems for which verbal part has the major role. Perhaps one the most
 52 | famous linguistic feature which is hard to beat is the Mel-Frequency
 53 | Cepstral Coefficients (MFCC). It uses speech raw frames in the range
 54 | from 20ms to 40ms for having stationary
 55 | characteristics [@rabiner1993fundamentals]. MFCC is widely used for both
 56 | ASR and SR tasks and more recently in the associated deep learning
 57 | applications as the input to the network rather than directly feeding
 58 | the signal [@deng2013recent; @lee2009unsupervised; @yu2011improved].
 59 | With the advent of deep learning [@lecun2015deep; @torfi2018attention],
 60 | major improvements have been achieved by using deep neural networks
 61 | rather than traditional methods for speech recognition
 62 | applications [@variani2014deep; @hinton2012deep; @liu2015deep].
 63 | 
 64 | With the availability of free software for speech recognition such as
 65 | VOICEBOX[^1], most of these softwares are Matlab-based which limits
 66 | their reproducibility due to commercial issues. Another great package is
 67 | PyAudioAnalysis [@giannakopoulos2015pyaudioanalysis], which is a
 68 | comprehensive package developed in Python. However, the issue with
 69 | PyAudioAnalysis is that its complexity and being too verbose for
 70 | extracting simple features and it also lacks some important
 71 | preprocessing and post-processing operations for its current version.
 72 | 
 73 | Considering the recent advent of deep learning in ASR and SR and the
 74 | importance of the accurate speech feature extraction, here are the
 75 | motivations behind SpeechPy package:
 76 | 
 77 |   * Developing a free open source package which covers important
 78 |     preprocessing techniques, speech features, and post-processing
 79 |     operations required for ASR and SR applications.
 80 | 
 81 |   * A simple package with a minimum degree of complexity should be
 82 |     available for beginners.
 83 | 
 84 |   * A well-tested and continuously integrated package for future
 85 |     developments should be developed.
 86 | 
 87 | SpeechPy has been developed to satisfy the aforementioned needs. It
 88 | contains the most important preprocessing and post-processing operations
 89 | and a selection of frequently used speech features. The package is free
 90 | and released as an open source software[^2]. Continuous integration
 91 | using for instant error check and validity of changes has been deployed
 92 | for SpeechPy. Moreover, prior to the latest official release of
 93 | SpeechPy, the package has successfully been utilized for research
 94 | purposes [@torfi20173d; @torfi2017text].
 95 | 
 96 | # Package Eco-system
 97 | 
 98 | 
 99 | SpeechPy has been developed using Python language for its interface and
100 | backed as well. An empirical study demonstrated that Python as a
101 | scripting language, is more effective and productive than conventional
102 | languages[^3] for some programming problems and memory consumption is
103 | often “better than Java and not much worse than C or
104 | C++” [@prechelt2000empirical]. We chose Python due to its simplicity and
105 | popularity. Third-party libraries are avoided except *Numpy* and *Scipy*
106 | for handling data and numeric computations.
107 | 
108 | ## Complexity
109 | 
110 | As the user should not and does not even need to manipulate the internal
111 | package structure, object-oriented programming is mostly used for
112 | package development which provides easier interface for the user with a
113 | sacrifice to the simplicity of the code. However, the internal code
114 | complexity of the package does not affect the user experience since the
115 | modules can easily be called with the associated arguments. SpeechPy is
116 | a library with a collection of sub-modules.
117 | 
118 | ## Code Style and Documentation
119 | 
120 | SpeechPy is constructed based on PEP 8 style guide for Python codes.
121 | Moreover, it is extensively documented using the formatted docstrings
122 | and Sphinx[^4] for further automatic modifications to the document in
123 | case of changing internal modules. The full documentation of the project
124 | will be generated in HTML and PDF format using Sphinx and is hosted
125 | online. The official releases of the project are hosted on the Zenodo as
126 | well[^5] [@torfispeechpy].
127 | 
128 | ![A general view of the package](_imgs/packageview.png)
129 | 
130 | ## Continuous Testing and Extensibility
131 | 
132 | The output of each function has been evaluated as well using different
133 | tests as opposed to the other existing standard packages. For continuous
134 | testing, the code is hosted on GitHub and integrated with Travis CI.
135 | Each modification to the code must pass the unit tests defined for the
136 | continuous integration. This will ensure the package does not break with
137 | unadapted code scripts. However, the validity of the modifications
138 | should always be investigated with the owner or authorized collaborators
139 | of the project. The code will be tested at each time of modification for
140 | Python versions *“2.7”*, *“3.4”* and *“3.5”*. In the future, these
141 | versions are subject to change.
142 | 
143 | ![Travic CI web interface after testing SpeechPy against a new change](_imgs/travicCI.png)
144 | 
145 | # Availability
146 | 
147 | ## Operating system {#operating-system .unnumbered}
148 | 
149 | Tested on Ubuntu 14.04 and 16.04 LTS Linux, Apple Mac OS X 10.9.5 , and
150 | Microsoft Windows 7 & 10. We expect that SpeechPy works on any
151 | distribution as long as Python and the package dependencies are
152 | installed.
153 | 
154 | ## Programming language {#programming-language .unnumbered}
155 | 
156 | The package has been tested Python 2.7, 3.4 and 3.5. However, using
157 | Python 3.5 is suggested.
158 | 
159 | ## Additional system requirements & dependencies {#additional-system-requirements-dependencies .unnumbered}
160 | 
161 | SpeechPy is a light package and small computational power would be
162 | enough for running it. Although the speed of the execution is totally
163 | dependent to the system architecture. The dependencies are as follows:
164 | 
165 |   * Numpy
166 | 
167 |   * SciPy
168 | 
169 | # Aknowledgement
170 | 
171 | This work has been completed in part with computational resources
172 | provided by the West Virginia University and is based upon a work
173 | supported by the Center for Identification Technology Research (CITeR)
174 | and the National Science Foundation (NSF) under Grant \#1650474.
175 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | speechpy
4 | python-coveralls
5 | pytest
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.rst
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(name='speechpy',
 4 |       version='2.4',
 5 |       description='The python package for extracting speech features.',
 6 |       author='Amirsina Torfi',
 7 |       author_email='amirsina.torfi@gmail.com',
 8 |       url='https://github.com/astorfi/speechpy',
 9 |       download_url = 'https://github.com/astorfi/speechpy/archive/2.4.zip',
10 |       packages=find_packages(exclude=('tests', 'docs')),
11 |       include_package_data=True,
12 |       install_requires=[
13 |           'scipy',
14 |           'numpy',
15 |       ],
16 |       zip_safe=False)
17 | 


--------------------------------------------------------------------------------
/speechpy/__init__.py:
--------------------------------------------------------------------------------
1 | from . import feature
2 | from . import processing
3 | 


--------------------------------------------------------------------------------
/speechpy/feature.py:
--------------------------------------------------------------------------------
  1 | """feature module.
  2 | 
  3 | This module provides functions for calculating the main speech
  4 | features that the package is aimed to extract as well as the required
  5 | elements.
  6 | 
  7 | 
  8 | Functions:
  9 | 
 10 |     filterbanks: Compute the Mel-filterbanks
 11 |                  The filterbanks must be created for extracting
 12 |                  speech features such as MFCC.
 13 | 
 14 |     mfcc: Extracting Mel Frequency Cepstral Coefficient feature.
 15 | 
 16 |     mfe: Extracting Mel Energy feature.
 17 | 
 18 |     lmfe: Extracting Log Mel Energy feature.
 19 | 
 20 |     extract_derivative_feature: Extract the first and second derivative
 21 |         features. This finction, directly use the ``derivative_extraction``
 22 |         function in the ``processing`` module.
 23 | 
 24 | """
 25 | 
 26 | from __future__ import division
 27 | import numpy as np
 28 | from . import processing
 29 | from scipy.fftpack import dct
 30 | from . import functions
 31 | 
 32 | 
 33 | def filterbanks(
 34 |         num_filter,
 35 |         coefficients,
 36 |         sampling_freq,
 37 |         low_freq=None,
 38 |         high_freq=None):
 39 |     """Compute the Mel-filterbanks. Each filter will be stored in one rows.
 40 |     The columns correspond to fft bins.
 41 | 
 42 |     Args:
 43 |         num_filter (int): the number of filters in the filterbank, default 20.
 44 |         coefficients (int): (fftpoints//2 + 1). Default is 257.
 45 |         sampling_freq (float): the samplerate of the signal we are working
 46 |             with. It affects mel spacing.
 47 |         low_freq (float): lowest band edge of mel filters, default 0 Hz
 48 |         high_freq (float): highest band edge of mel filters,
 49 |             default samplerate/2
 50 | 
 51 |     Returns:
 52 |            array: A numpy array of size num_filter x (fftpoints//2 + 1)
 53 |                which are filterbank
 54 |     """
 55 |     high_freq = high_freq or sampling_freq / 2
 56 |     low_freq = low_freq or 300
 57 |     s = "High frequency cannot be greater than half of the sampling frequency!"
 58 |     assert high_freq <= sampling_freq / 2, s
 59 |     assert low_freq >= 0, "low frequency cannot be less than zero!"
 60 | 
 61 |     # Computing the Mel filterbank
 62 |     # converting the upper and lower frequencies to Mels.
 63 |     # num_filter + 2 is because for num_filter filterbanks we need
 64 |     # num_filter+2 point.
 65 |     mels = np.linspace(
 66 |         functions.frequency_to_mel(low_freq),
 67 |         functions.frequency_to_mel(high_freq),
 68 |         num_filter + 2)
 69 | 
 70 |     # we should convert Mels back to Hertz because the start and end-points
 71 |     # should be at the desired frequencies.
 72 |     hertz = functions.mel_to_frequency(mels)
 73 | 
 74 |     # The frequency resolution required to put filters at the
 75 |     # exact points calculated above should be extracted.
 76 |     #  So we should round those frequencies to the closest FFT bin.
 77 |     freq_index = (
 78 |         np.floor(
 79 |             (coefficients +
 80 |              1) *
 81 |             hertz /
 82 |             sampling_freq)).astype(int)
 83 | 
 84 |     # Initial definition
 85 |     filterbank = np.zeros([num_filter, coefficients])
 86 | 
 87 |     # The triangular function for each filter
 88 |     for i in range(0, num_filter):
 89 |         left = int(freq_index[i])
 90 |         middle = int(freq_index[i + 1])
 91 |         right = int(freq_index[i + 2])
 92 |         z = np.linspace(left, right, num=right - left + 1)
 93 |         filterbank[i,
 94 |                    left:right + 1] = functions.triangle(z,
 95 |                                                         left=left,
 96 |                                                         middle=middle,
 97 |                                                         right=right)
 98 | 
 99 |     return filterbank
100 | 
101 | 
102 | def mfcc(
103 |         signal,
104 |         sampling_frequency,
105 |         frame_length=0.020,
106 |         frame_stride=0.01,
107 |         num_cepstral=13,
108 |         num_filters=40,
109 |         fft_length=512,
110 |         low_frequency=0,
111 |         high_frequency=None,
112 |         dc_elimination=True):
113 |     """Compute MFCC features from an audio signal.
114 | 
115 |     Args:
116 | 
117 |          signal (array): the audio signal from which to compute features.
118 |              Should be an N x 1 array
119 |          sampling_frequency (int): the sampling frequency of the signal
120 |              we are working with.
121 |          frame_length (float): the length of each frame in seconds.
122 |              Default is 0.020s
123 |          frame_stride (float): the step between successive frames in seconds.
124 |              Default is 0.02s (means no overlap)
125 |          num_filters (int): the number of filters in the filterbank,
126 |              default 40.
127 |          fft_length (int): number of FFT points. Default is 512.
128 |          low_frequency (float): lowest band edge of mel filters.
129 |              In Hz, default is 0.
130 |          high_frequency (float): highest band edge of mel filters.
131 |              In Hz, default is samplerate/2
132 |          num_cepstral (int): Number of cepstral coefficients.
133 |          dc_elimination (bool): hIf the first dc component should
134 |              be eliminated or not.
135 | 
136 |     Returns:
137 |         array: A numpy array of size (num_frames x num_cepstral) containing mfcc features.
138 |     """
139 |     feature, energy = mfe(signal, sampling_frequency=sampling_frequency,
140 |                           frame_length=frame_length, frame_stride=frame_stride,
141 |                           num_filters=num_filters, fft_length=fft_length,
142 |                           low_frequency=low_frequency,
143 |                           high_frequency=high_frequency)
144 |     if len(feature) == 0:
145 |         return np.empty((0, num_cepstral))
146 |     feature = np.log(feature)
147 |     feature = dct(feature, type=2, axis=-1, norm='ortho')[:, :num_cepstral]
148 | 
149 |     # replace first cepstral coefficient with log of frame energy for DC
150 |     # elimination.
151 |     if dc_elimination:
152 |         feature[:, 0] = np.log(energy)
153 |     return feature
154 | 
155 | 
156 | def mfe(signal, sampling_frequency, frame_length=0.020, frame_stride=0.01,
157 |         num_filters=40, fft_length=512, low_frequency=0, high_frequency=None):
158 |     """Compute Mel-filterbank energy features from an audio signal.
159 |     
160 |     Args:
161 |          signal (array): the audio signal from which to compute features.
162 |              Should be an N x 1 array
163 |          sampling_frequency (int): the sampling frequency of the signal
164 |              we are working with.
165 |          frame_length (float): the length of each frame in seconds.
166 |              Default is 0.020s
167 |          frame_stride (float): the step between successive frames in seconds.
168 |              Default is 0.02s (means no overlap)
169 |          num_filters (int): the number of filters in the filterbank,
170 |              default 40.
171 |          fft_length (int): number of FFT points. Default is 512.
172 |          low_frequency (float): lowest band edge of mel filters.
173 |              In Hz, default is 0.
174 |          high_frequency (float): highest band edge of mel filters.
175 |              In Hz, default is samplerate/2
176 | 
177 |     Returns:
178 |               array: features - the energy of fiterbank of size num_frames x num_filters. The energy of each frame: num_frames x 1
179 |     """
180 | 
181 |     # Convert to float
182 |     signal = signal.astype(float)
183 | 
184 |     # Stack frames
185 |     frames = processing.stack_frames(
186 |         signal,
187 |         sampling_frequency=sampling_frequency,
188 |         frame_length=frame_length,
189 |         frame_stride=frame_stride,
190 |         filter=lambda x: np.ones(
191 |             (x,
192 |              )),
193 |         zero_padding=False)
194 | 
195 |     # getting the high frequency
196 |     high_frequency = high_frequency or sampling_frequency / 2
197 | 
198 |     # calculation of the power sprectum
199 |     power_spectrum = processing.power_spectrum(frames, fft_length)
200 |     coefficients = power_spectrum.shape[1]
201 |     # this stores the total energy in each frame
202 |     frame_energies = np.sum(power_spectrum, 1)
203 | 
204 |     # Handling zero enegies.
205 |     frame_energies = functions.zero_handling(frame_energies)
206 | 
207 |     # Extracting the filterbank
208 |     filter_banks = filterbanks(
209 |         num_filters,
210 |         coefficients,
211 |         sampling_frequency,
212 |         low_frequency,
213 |         high_frequency)
214 | 
215 |     # Filterbank energies
216 |     features = np.dot(power_spectrum, filter_banks.T)
217 |     features = functions.zero_handling(features)
218 | 
219 |     return features, frame_energies
220 | 
221 | 
222 | def lmfe(signal, sampling_frequency, frame_length=0.020, frame_stride=0.01,
223 |          num_filters=40, fft_length=512, low_frequency=0, high_frequency=None):
224 |     """Compute log Mel-filterbank energy features from an audio signal.
225 | 
226 | 
227 |     Args:
228 |          signal (array): the audio signal from which to compute features.
229 |              Should be an N x 1 array
230 |          sampling_frequency (int): the sampling frequency of the signal
231 |              we are working with.
232 |          frame_length (float): the length of each frame in seconds.
233 |              Default is 0.020s
234 |          frame_stride (float): the step between successive frames in seconds.
235 |              Default is 0.02s (means no overlap)
236 |          num_filters (int): the number of filters in the filterbank,
237 |              default 40.
238 |          fft_length (int): number of FFT points. Default is 512.
239 |          low_frequency (float): lowest band edge of mel filters.
240 |              In Hz, default is 0.
241 |          high_frequency (float): highest band edge of mel filters.
242 |              In Hz, default is samplerate/2
243 | 
244 |     Returns:
245 |               array: Features - The log energy of fiterbank of size num_frames x num_filters frame_log_energies. The log energy of each frame num_frames x 1
246 |     """
247 | 
248 |     feature, frame_energies = mfe(signal,
249 |                                   sampling_frequency=sampling_frequency,
250 |                                   frame_length=frame_length,
251 |                                   frame_stride=frame_stride,
252 |                                   num_filters=num_filters,
253 |                                   fft_length=fft_length,
254 |                                   low_frequency=low_frequency,
255 |                                   high_frequency=high_frequency)
256 |     feature = np.log(feature)
257 | 
258 |     return feature
259 | 
260 | 
261 | def extract_derivative_feature(feature):
262 |     """
263 |     This function extracts temporal derivative features which are
264 |         first and second derivatives.
265 | 
266 |     Args:
267 |         feature (array): The feature vector which its size is: N x M
268 | 
269 |     Return:
270 |           array: The feature cube vector which contains the static, first and second derivative features of size: N x M x 3
271 |     """
272 |     first_derivative_feature = processing.derivative_extraction(
273 |         feature, DeltaWindows=2)
274 |     second_derivative_feature = processing.derivative_extraction(
275 |         first_derivative_feature, DeltaWindows=2)
276 | 
277 |     # Creating the future cube for each file
278 |     feature_cube = np.concatenate(
279 |         (feature[:, :, None], first_derivative_feature[:, :, None],
280 |          second_derivative_feature[:, :, None]),
281 |         axis=2)
282 |     return feature_cube
283 | 


--------------------------------------------------------------------------------
/speechpy/functions.py:
--------------------------------------------------------------------------------
 1 | """function module.
 2 | 
 3 | This module contains necessary functions for calculating the features
 4 | in the `features` module.
 5 | 
 6 | 
 7 | Attributes:
 8 | 
 9 |     frequency_to_mel: Converting the frequency to Mel scale.
10 |         This is necessary for filterbank energy calculation.
11 |     mel_to_frequency: Converting the Mel to frequency scale.
12 |         This is necessary for filterbank energy calculation.
13 |     triangle: Creating a triangle for filterbanks.
14 |         This is necessary for filterbank energy calculation.
15 |     zero_handling: Handling zero values due to the possible
16 |         issues regarding the log functions.
17 | """
18 | 
19 | from __future__ import division
20 | import numpy as np
21 | from . import processing
22 | from scipy.fftpack import dct
23 | import math
24 | 
25 | 
26 | def frequency_to_mel(f):
27 |     """converting from frequency to Mel scale.
28 | 
29 |     :param f: The frequency values(or a single frequency) in Hz.
30 |     :returns: The mel scale values(or a single mel).
31 |     """
32 |     return 1127 * np.log(1 + f / 700.)
33 | 
34 | 
35 | def mel_to_frequency(mel):
36 |     """converting from Mel scale to frequency.
37 | 
38 |     :param mel: The mel scale values(or a single mel).
39 |     :returns: The frequency values(or a single frequency) in Hz.
40 |     """
41 |     return 700 * (np.exp(mel / 1127.0) - 1)
42 | 
43 | 
44 | def triangle(x, left, middle, right):
45 |     out = np.zeros(x.shape)
46 |     out[x <= left] = 0
47 |     out[x >= right] = 0
48 |     first_half = np.logical_and(left < x, x <= middle)
49 |     out[first_half] = (x[first_half] - left) / (middle - left)
50 |     second_half = np.logical_and(middle <= x, x < right)
51 |     out[second_half] = (right - x[second_half]) / (right - middle)
52 |     return out
53 | 
54 | 
55 | def zero_handling(x):
56 |     """
57 |     This function handle the issue with zero values if the are exposed
58 |     to become an argument for any log function.
59 |     :param x: The vector.
60 |     :return: The vector with zeros substituted with epsilon values.
61 |     """
62 |     return np.where(x == 0, np.finfo(float).eps, x)
63 | 


--------------------------------------------------------------------------------
/speechpy/processing.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Processing module for signal processing operations.
  3 | 
  4 | This module demonstrates documentation for the signal processing
  5 | function which are required as internal computations in the package.
  6 | 
  7 | 
  8 | Attributes:
  9 | 
 10 |     preemphasis: Preemphasising on the signal. This is a preprocessing step.
 11 | 
 12 |     stack_frames: Create stacking frames from the raw signal.
 13 | 
 14 |     fft_spectrum: Calculation of the Fast Fourier Transform.
 15 | 
 16 |     power_spectrum: Power Spectrum calculation.
 17 | 
 18 |     log_power_spectrum: Log Power Spectrum calculation.
 19 | 
 20 |     derivative_extraction: Calculation of the derivative of the extracted featurs.
 21 | 
 22 |     cmvn: Cepstral mean variance normalization. This is a post processing operation.
 23 | 
 24 |     cmvnw: Cepstral mean variance normalization over the sliding window. This is a post processing operation.
 25 | 
 26 | """
 27 | 
 28 | __license__ = "MIT"
 29 | __author__ = " Amirsina Torfi"
 30 | __docformat__ = 'reStructuredText'
 31 | 
 32 | import decimal
 33 | import numpy as np
 34 | import math
 35 | 
 36 | 
 37 | # 1.4 becomes 1 and 1.6 becomes 2. special case: 1.5 becomes 2.
 38 | def round_half_up(number):
 39 |     return int(
 40 |         decimal.Decimal(number).quantize(
 41 |             decimal.Decimal('1'),
 42 |             rounding=decimal.ROUND_HALF_UP))
 43 | 
 44 | 
 45 | def preemphasis(signal, shift=1, cof=0.98):
 46 |     """preemphasising on the signal.
 47 | 
 48 |     Args:
 49 |         signal (array): The input signal.
 50 |         shift (int): The shift step.
 51 |         cof (float): The preemphasising coefficient. 0 equals to no filtering.
 52 | 
 53 |     Returns:
 54 |            array: The pre-emphasized signal.
 55 |     """
 56 | 
 57 |     rolled_signal = np.roll(signal, shift)
 58 |     return signal - cof * rolled_signal
 59 | 
 60 | 
 61 | def stack_frames(
 62 |         sig,
 63 |         sampling_frequency,
 64 |         frame_length=0.020,
 65 |         frame_stride=0.020,
 66 |         filter=lambda x: np.ones(
 67 |             (x,
 68 |              )),
 69 |         zero_padding=True):
 70 |     """Frame a signal into overlapping frames.
 71 | 
 72 |     Args:
 73 |         sig (array): The audio signal to frame of size (N,).
 74 |         sampling_frequency (int): The sampling frequency of the signal.
 75 |         frame_length (float): The length of the frame in second.
 76 |         frame_stride (float): The stride between frames.
 77 |         filter (array): The time-domain filter for applying to each frame.
 78 |             By default it is one so nothing will be changed.
 79 |         zero_padding (bool): If the samples is not a multiple of
 80 |             frame_length(number of frames sample), zero padding will
 81 |             be done for generating last frame.
 82 | 
 83 |     Returns:
 84 |             array: Stacked_frames-Array of frames of size (number_of_frames x frame_len).
 85 | 
 86 |     """
 87 | 
 88 |     # Check dimension
 89 |     s = "Signal dimention should be of the format of (N,) but it is %s instead"
 90 |     assert sig.ndim == 1, s % str(sig.shape)
 91 | 
 92 |     # Initial necessary values
 93 |     length_signal = sig.shape[0]
 94 |     frame_sample_length = int(
 95 |         np.round(
 96 |             sampling_frequency *
 97 |             frame_length))  # Defined by the number of samples
 98 |     frame_stride = float(np.round(sampling_frequency * frame_stride))
 99 | 
100 |     # Zero padding is done for allocating space for the last frame.
101 |     if zero_padding:
102 |         # Calculation of number of frames
103 |         numframes = (int(math.ceil((length_signal
104 |                                       - frame_sample_length) / frame_stride)))
105 |         print(numframes,length_signal,frame_sample_length,frame_stride)
106 | 
107 |         # Zero padding
108 |         len_sig = int(numframes * frame_stride + frame_sample_length)
109 |         additive_zeros = np.zeros((len_sig - length_signal,))
110 |         signal = np.concatenate((sig, additive_zeros))
111 | 
112 |     else:
113 |         # No zero padding! The last frame which does not have enough
114 |         # samples(remaining samples <= frame_sample_length), will be dropped!
115 |         numframes = int(math.floor((length_signal
116 |                           - frame_sample_length) / frame_stride))
117 | 
118 |         # new length
119 |         len_sig = int((numframes - 1) * frame_stride + frame_sample_length)
120 |         signal = sig[0:len_sig]
121 | 
122 |     # Getting the indices of all frames.
123 |     indices = np.tile(np.arange(0,
124 |                                 frame_sample_length),
125 |                       (numframes,
126 |                        1)) + np.tile(np.arange(0,
127 |                                                numframes * frame_stride,
128 |                                                frame_stride),
129 |                                      (frame_sample_length,
130 |                                       1)).T
131 |     indices = np.array(indices, dtype=np.int32)
132 | 
133 |     # Extracting the frames based on the allocated indices.
134 |     frames = signal[indices]
135 | 
136 |     # Apply the windows function
137 |     window = np.tile(filter(frame_sample_length), (numframes, 1))
138 |     Extracted_Frames = frames * window
139 |     return Extracted_Frames
140 | 
141 | 
142 | def fft_spectrum(frames, fft_points=512):
143 |     """This function computes the one-dimensional n-point discrete Fourier
144 |     Transform (DFT) of a real-valued array by means of an efficient algorithm
145 |     called the Fast Fourier Transform (FFT). Please refer to
146 |     https://docs.scipy.org/doc/numpy/reference/generated/numpy.fft.rfft.html
147 |     for further details.
148 | 
149 |     Args:
150 |         frames (array): The frame array in which each row is a frame.
151 |         fft_points (int): The length of FFT. If fft_length is greater than frame_len, the frames will be zero-padded.
152 | 
153 |     Returns:
154 |             array: The fft spectrum.
155 |             If frames is an num_frames x sample_per_frame matrix, output
156 |             will be num_frames x FFT_LENGTH.
157 |     """
158 |     SPECTRUM_VECTOR = np.fft.rfft(frames, n=fft_points, axis=-1, norm=None)
159 |     return np.absolute(SPECTRUM_VECTOR)
160 | 
161 | 
162 | def power_spectrum(frames, fft_points=512):
163 |     """Power spectrum of each frame.
164 | 
165 |     Args:
166 |         frames (array): The frame array in which each row is a frame.
167 |         fft_points (int): The length of FFT. If fft_length is greater than frame_len, the frames will be zero-padded.
168 | 
169 |     Returns:
170 |             array: The power spectrum.
171 |             If frames is an num_frames x sample_per_frame matrix, output
172 |             will be num_frames x fft_length.
173 |     """
174 |     return 1.0 / fft_points * np.square(fft_spectrum(frames, fft_points))
175 | 
176 | 
177 | def log_power_spectrum(frames, fft_points=512, normalize=True):
178 |     """Log power spectrum of each frame in frames.
179 | 
180 |     Args:
181 |         frames (array): The frame array in which each row is a frame.
182 |         fft_points (int): The length of FFT. If fft_length is greater than
183 |             frame_len, the frames will be zero-padded.
184 |         normalize (bool): If normalize=True, the log power spectrum
185 |             will be normalized.
186 | 
187 |     Returns:
188 |            array: The power spectrum - If frames is an
189 |            num_frames x sample_per_frame matrix, output will be
190 |            num_frames x fft_length.
191 |     """
192 |     power_spec = power_spectrum(frames, fft_points)
193 |     power_spec[power_spec <= 1e-20] = 1e-20
194 |     log_power_spec = 10 * np.log10(power_spec)
195 |     if normalize:
196 |         return log_power_spec - np.max(log_power_spec)
197 |     else:
198 |         return log_power_spec
199 | 
200 | 
201 | def derivative_extraction(feat, DeltaWindows):
202 |     """This function the derivative features.
203 | 
204 |     Args:
205 |         feat (array): The main feature vector(For returning the second
206 |              order derivative it can be first-order derivative).
207 |         DeltaWindows (int): The value of  DeltaWindows is set using
208 |             the configuration parameter DELTAWINDOW.
209 | 
210 |     Returns:
211 |            array: Derivative feature vector - A NUMFRAMESxNUMFEATURES numpy
212 |            array which is the derivative features along the features.
213 |     """
214 | 
215 |     # Getting the shape of the vector.
216 |     rows, cols = feat.shape
217 | 
218 |     # Difining the vector of differences.
219 |     DIF = np.zeros(feat.shape, dtype=feat.dtype)
220 |     Scale = 0
221 | 
222 |     # Pad only along features in the vector.
223 |     FEAT = np.lib.pad(feat, ((0, 0), (DeltaWindows, DeltaWindows)), 'edge')
224 |     for i in range(DeltaWindows):
225 |         # Start index
226 |         offset = DeltaWindows
227 | 
228 |         # The dynamic range
229 |         Range = i + 1
230 | 
231 |         dif = Range * FEAT[:, offset + Range:offset + Range + cols]
232 |         - FEAT[:, offset - Range:offset - Range + cols]
233 |         Scale += 2 * np.power(Range, 2)
234 |         DIF += dif
235 | 
236 |     return DIF / Scale
237 | 
238 | 
239 | def cmvn(vec, variance_normalization=False):
240 |     """ This function is aimed to perform global cepstral mean and
241 |         variance normalization (CMVN) on input feature vector "vec".
242 |         The code assumes that there is one observation per row.
243 | 
244 |     Args:
245 |         vec (array): input feature matrix
246 |             (size:(num_observation,num_features))
247 |         variance_normalization (bool): If the variance
248 |             normilization should be performed or not.
249 | 
250 |     Return:
251 |           array: The mean(or mean+variance) normalized feature vector.
252 |     """
253 |     eps = 2**-30
254 |     rows, cols = vec.shape
255 | 
256 |     # Mean calculation
257 |     norm = np.mean(vec, axis=0)
258 |     norm_vec = np.tile(norm, (rows, 1))
259 | 
260 |     # Mean subtraction
261 |     mean_subtracted = vec - norm_vec
262 | 
263 |     # Variance normalization
264 |     if variance_normalization:
265 |         stdev = np.std(mean_subtracted, axis=0)
266 |         stdev_vec = np.tile(stdev, (rows, 1))
267 |         output = mean_subtracted / (stdev_vec + eps)
268 |     else:
269 |         output = mean_subtracted
270 | 
271 |     return output
272 | 
273 | 
274 | def cmvnw(vec, win_size=301, variance_normalization=False):
275 |     """ This function is aimed to perform local cepstral mean and
276 |     variance normalization on a sliding window. The code assumes that
277 |     there is one observation per row.
278 | 
279 |     Args:
280 |         vec (array): input feature matrix
281 |             (size:(num_observation,num_features))
282 |         win_size (int): The size of sliding window for local normalization.
283 |             Default=301 which is around 3s if 100 Hz rate is
284 |             considered(== 10ms frame stide)
285 |         variance_normalization (bool): If the variance normilization should
286 |             be performed or not.
287 | 
288 |     Return:
289 |           array: The mean(or mean+variance) normalized feature vector.
290 |     """
291 |     # Get the shapes
292 |     eps = 2**-30
293 |     rows, cols = vec.shape
294 | 
295 |     # Windows size must be odd.
296 |     assert isinstance(win_size, int), "Size must be of type 'int'!"
297 |     assert win_size % 2 == 1, "Windows size must be odd!"
298 | 
299 |     # Padding and initial definitions
300 |     pad_size = int((win_size - 1) / 2)
301 |     vec_pad = np.lib.pad(vec, ((pad_size, pad_size), (0, 0)), 'symmetric')
302 |     mean_subtracted = np.zeros(np.shape(vec), dtype=np.float32)
303 | 
304 |     for i in range(rows):
305 |         window = vec_pad[i:i + win_size, :]
306 |         window_mean = np.mean(window, axis=0)
307 |         mean_subtracted[i, :] = vec[i, :] - window_mean
308 | 
309 |     # Variance normalization
310 |     if variance_normalization:
311 | 
312 |         # Initial definitions.
313 |         variance_normalized = np.zeros(np.shape(vec), dtype=np.float32)
314 |         vec_pad_variance = np.lib.pad(
315 |             mean_subtracted, ((pad_size, pad_size), (0, 0)), 'symmetric')
316 | 
317 |         # Looping over all observations.
318 |         for i in range(rows):
319 |             window = vec_pad_variance[i:i + win_size, :]
320 |             window_variance = np.std(window, axis=0)
321 |             variance_normalized[i, :] \
322 |             = mean_subtracted[i, :] / (window_variance + eps)
323 |         output = variance_normalized
324 |     else:
325 |         output = mean_subtracted
326 | 
327 |     return output
328 | 
329 | 
330 | # def resample_Fn(wave, fs, f_new=16000):
331 | #     """This function resample the data to arbitrary frequency
332 | #     :param fs: Frequency of the sound file.
333 | #     :param wave: The sound file itself.
334 | #     :returns:
335 | #            f_new: The new frequency.
336 | #            signal_new: The new signal samples at new frequency.
337 | #
338 | #     dependency: from scikits.samplerate import resample
339 | #     """
340 | #
341 | #     # Resampling using interpolation(There are other
342 | #     methods than 'sinc_best')
343 | #     signal_new = resample(wave, float(f_new) / fs, 'sinc_best')
344 | #
345 | #     # Necessary data converting for saving .wav file using scipy.
346 | #     signal_new = np.asarray(signal_new, dtype=np.int16)
347 | #
348 | #     # # Uncomment if you want to save the audio file
349 | #     # # Save using new format
350 | #     # wav.write(filename='resample_rainbow_16k.wav',rate=fr,data=signal_new)
351 | #     return signal_new, f_new
352 | 


--------------------------------------------------------------------------------
/tests/Alesis-Sanctuary-QCard-AcoustcBas-C2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astorfi/speechpy/b317746f042d02e03124518625be6d9327b71e4a/tests/Alesis-Sanctuary-QCard-AcoustcBas-C2.wav


--------------------------------------------------------------------------------
/tests/test_speechpy.py:
--------------------------------------------------------------------------------
  1 | import scipy.io.wavfile as wav
  2 | import numpy as np
  3 | import os
  4 | import sys
  5 | lib_path = os.path.abspath(os.path.join('..'))
  6 | print(lib_path)
  7 | sys.path.append(lib_path)
  8 | from speechpy import processing
  9 | from speechpy import feature
 10 | from speechpy import functions
 11 | 
 12 | # Ramdom signal generation for testing
 13 | mu, sigma = 0, 0.1 # mean and standard deviation
 14 | signal = np.random.normal(mu, sigma, 1000000)
 15 | fs = 16000
 16 | 
 17 |  # Generating stached frames with SpeechPy
 18 | frame_length = 0.02
 19 | frame_stride = 0.02
 20 | num_filters=40
 21 | 
 22 | 
 23 | class Test_Methods_Exists(object):
 24 |     def test_processing(self):
 25 |         
 26 |         # Cheching the availibility of functions in the chosen attribute
 27 |         assert hasattr(processing, 'preemphasis')
 28 |         assert hasattr(processing, 'stack_frames')
 29 |         assert hasattr(processing, 'fft_spectrum')
 30 |         assert hasattr(processing, 'power_spectrum')
 31 |         assert hasattr(processing, 'log_power_spectrum')
 32 |         assert hasattr(processing, 'derivative_extraction')
 33 |         assert hasattr(processing, 'cmvn')
 34 |         assert hasattr(processing, 'cmvnw')
 35 | 
 36 |     def test_feature(self):
 37 |         
 38 |         # Cheching the availibility of functions in the chosen attribute
 39 |         assert hasattr(feature, 'filterbanks')
 40 |         assert hasattr(feature, 'mfcc')
 41 |         assert hasattr(feature, 'mfe')
 42 |         assert hasattr(feature, 'lmfe')
 43 |         assert hasattr(feature, 'extract_derivative_feature')
 44 |         
 45 |     def test_functions(self):
 46 |     
 47 |         # Cheching the availibility of functions in the chosen attribute
 48 |         assert hasattr(functions, 'frequency_to_mel')
 49 |         assert hasattr(functions, 'mel_to_frequency')
 50 |         assert hasattr(functions, 'triangle')
 51 |         assert hasattr(functions, 'zero_handling')
 52 |         
 53 | 
 54 | class Test_Processing(object):
 55 | 
 56 |     def test_preemphasis(self):
 57 |        
 58 |        # Performing the operation on the generated signal.
 59 |        signal_preemphasized = processing.preemphasis(signal, cof=0.98)
 60 |        
 61 |        # Shape matcher
 62 |        assert signal_preemphasized.ndim == 1
 63 |        assert signal_preemphasized.shape == signal.shape
 64 |        
 65 |     def test_stack_frames(self):
 66 |         
 67 |         frames = processing.stack_frames(signal, sampling_frequency=fs,
 68 |                                   frame_length=frame_length,
 69 |                                   frame_stride=frame_stride,
 70 |                                   filter=lambda x: np.ones((x,)),
 71 |                                   zero_padding=True)
 72 |                 
 73 |         # Direct calculation using numpy
 74 |         window = int(np.round(frame_length * fs))
 75 |         step = int(np.round(frame_stride * fs))
 76 |         all_frames = (int(np.ceil((signal.shape[0]
 77 |                                       - window) / step)))
 78 |         
 79 |         # Shape matching of stacked frames
 80 |         assert all_frames == frames.shape[0]
 81 |     
 82 |     def test_cmvn(self):
 83 |         
 84 |         feature_vector = np.random.rand(50,100)
 85 |         normalized_feature = processing.cmvn(feature_vector, variance_normalization=True)
 86 |         
 87 |         # Shape match
 88 |         assert normalized_feature.shape == feature_vector.shape
 89 |         
 90 |         # Check the std and mean of the output vector
 91 |         assert np.allclose(np.mean(normalized_feature,axis=0), np.zeros((1,normalized_feature.shape[1])))
 92 |         assert np.allclose(np.std(normalized_feature,axis=0), np.ones((1,normalized_feature.shape[1])))
 93 |         
 94 |         
 95 | class Test_feature(object):
 96 | 
 97 |     def test_mfcc(self):
 98 |        
 99 |        num_cepstral = 13
100 |        mfcc = feature.mfcc(signal, sampling_frequency=fs,
101 |                              frame_length=0.020, num_cepstral=num_cepstral, frame_stride=0.01,
102 |                              num_filters=num_filters, fft_length=512, low_frequency=0,
103 |                              high_frequency=None)
104 | 
105 |        # Shape matcher
106 |        assert mfcc.shape[1] == num_cepstral
107 |        
108 |         
109 |         
110 |         
111 |         
112 |         
113 |         
114 | 


--------------------------------------------------------------------------------