├── .codecov.yml ├── .coveragerc ├── .coveralls.yml ├── .gitignore ├── .readthedocs.yml ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── appveyor.yml ├── docs ├── Makefile ├── icon.png ├── make.bat ├── requirements.txt └── source │ ├── _static │ ├── creativecommons.png │ ├── css │ │ └── custom.css │ ├── favicon_io │ │ ├── android-chrome-192x192.png │ │ ├── android-chrome-512x512.png │ │ ├── apple-touch-icon.png │ │ ├── favicon-16x16.png │ │ ├── favicon-32x32.png │ │ ├── favicon.ico │ │ └── site.webmanifest │ ├── logo.png │ └── logo1.png │ ├── _templates │ ├── footer.html │ ├── hello.html │ ├── layout.html │ ├── navbar-nav.html │ ├── navbar_version.html │ └── sidebar-nav-bs.html │ ├── auga.rst │ ├── augf.rst │ ├── augt.rst │ ├── code.rst │ ├── conf.py │ ├── filters.rst │ ├── index.rst │ └── io.rst ├── language.json ├── paper ├── Makefile ├── README.md ├── assets │ ├── apa.csl │ ├── latex.template │ └── logo.png ├── experiment │ ├── README.md │ ├── _1_get_data.py │ ├── _2_gen_augmentations.py │ ├── _3_get_features.py │ ├── _4_run_experiment.py │ ├── results.txt │ ├── sounds │ │ ├── ir_classroom.wav │ │ ├── ir_smartphone_mic.wav │ │ ├── tel_noise.wav │ │ └── white_noise.wav │ └── utils │ │ ├── classifiers_config.py │ │ ├── cmat.py │ │ └── dataproc.py ├── figs │ ├── bpass.png │ ├── hpass.png │ └── lpass.png ├── paper.bib ├── paper.md ├── paper.pdf └── refs.md ├── pydiogment ├── __init__.py ├── auga.py ├── augf.py ├── augt.py └── utils │ ├── __init__.py │ ├── filters.py │ └── io.py ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── test_auga.py ├── test_augf.py ├── test_augt.py ├── test_utils.py └── testfiles ├── tel_noise.wav ├── test.wav ├── test_augmented_-100_noisy.wav ├── test_augmented_-20_noisy.wav ├── test_augmented_-3_noisy.wav ├── test_augmented_-50_noisy.wav ├── test_augmented_-6_noisy.wav ├── test_augmented_0.9_pass_filtered.wav ├── test_augmented_0.9_toned.wav ├── test_augmented_1.1_pass_filtered.wav ├── test_augmented_1.1_toned.wav ├── test_augmented_fade_in_out.wav ├── test_augmented_left_1_shifted.wav ├── test_augmented_peak_normalized.wav ├── test_augmented_randomly_cropped_1.wav ├── test_augmented_resampled_to_16000.wav ├── test_augmented_resampled_to_4000.wav ├── test_augmented_resampled_to_6000.wav ├── test_augmented_resampled_to_9000.wav ├── test_augmented_reversed.wav ├── test_augmented_right_1_shifted.wav ├── test_augmented_rms_normalized.wav ├── test_augmented_slowed.wav ├── test_augmented_speeded.wav ├── test_augmented_tel_noise_convolved_with_level_0.01.wav ├── test_augmented_tel_noise_convolved_with_level_0.25.wav ├── test_augmented_tel_noise_convolved_with_level_0.5.wav ├── test_augmented_with_-100_gain.wav ├── test_augmented_with_-25_gain.wav ├── test_augmented_with_-50_gain.wav └── test_augmented_without_silence.wav /.codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | notify: 3 | require_ci_to_pass: yes 4 | bot: Pydiogment-codecov-bot 5 | 6 | coverage: 7 | precision: 2 8 | round: down 9 | range: "70...100" 10 | 11 | status: 12 | project: yes 13 | patch: yes 14 | changes: no 15 | 16 | parsers: 17 | gcov: 18 | branch_detection: 19 | conditional: yes 20 | loop: yes 21 | method: no 22 | macro: no 23 | 24 | comment: 25 | layout: "header, diff" 26 | behavior: default 27 | require_changes: no 28 | 29 | ignore: 30 | - "setup.py" 31 | - "**/*.md" 32 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc 2 | [report] 3 | show_missing = True 4 | omit = 5 | paper/* 6 | setup.py 7 | *.md 8 | -------------------------------------------------------------------------------- /.coveralls.yml: -------------------------------------------------------------------------------- 1 | service_name: travis-pro 2 | repo_token: SpTvumnGHOE9XKc5K9waBJ1NprJ6iiBRM 3 | parallel: true # if the CI is running your build in parallel -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | .pypirc 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | 108 | # models 109 | paper/experiment/models 110 | paper/experiment/data 111 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Build documentation in the docs/ directory with Sphinx 8 | sphinx: 9 | configuration: docs/source/conf.py 10 | 11 | python: 12 | version: 3.6 13 | install: 14 | - requirements: docs/requirements.txt 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | - "3.6" 5 | - "3.7" 6 | 7 | cache: 8 | - pip 9 | 10 | # command to install dependencies 11 | before_script: 12 | - sudo apt-get update 13 | - sudo apt-get install python3 14 | - sudo apt-get install python3-pip 15 | - sudo apt-get install python3-setuptools 16 | - pip3 install --upgrade setuptools 17 | - pip3 install pytest 18 | - pip3 install pytest-cov pytest 19 | - pip3 install codecov 20 | - pip3 install coveralls 21 | - pip3 install pytest-xdist 22 | - pip3 install codacy-coverage 23 | - sudo apt-get update 24 | - pip3 install matplotlib 25 | - pip3 install -r requirements.txt 26 | - sudo apt install ffmpeg 27 | 28 | # command to run tests 29 | script: 30 | - pytest --cache-clear -n 8 -q --cov=./ || --force-yes 31 | 32 | after_success: 33 | - codecov 34 | - coveralls 35 | - coverage xml 36 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language. 18 | * Being respectful of differing viewpoints and experiences. 19 | * Gracefully accepting constructive criticism. 20 | * Focusing on what is best for the community. 21 | * Showing empathy towards other community members. 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances. 27 | * Trolling, insulting/derogatory comments, and personal or political attacks. 28 | * Public or private harassment. 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission. 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting. 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at superkogito@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available [here](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html). 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, please refer to this [link](https://www.contributor-covenant.org/faq). 76 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing guidelines 2 | ======================= 3 | 4 | How to contribute 5 | ----------------- 6 | 7 | The preferred way to contribute to pydiogment is to fork the [main repository](https://github.com/SuperKogito/pydiogment) on GitHub: 8 | 9 | 1. Fork the [project repository](https://github.com/SuperKogito/pydiogment): click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. 10 | 11 | 2. Clone this copy to your local disk: 12 | 13 | - Using SSH: 14 | 15 | ```bash 16 | git clone git@github.com:YourLogin/pydiogment.git 17 | cd pydiogment 18 | ``` 19 | 20 | - Using HTTPS: 21 | 22 | ```bash 23 | git clone https://github.com/YourLogin/pydiogment.git 24 | cd pydiogment 25 | ``` 26 | 27 | 3. Remove any previously installed pydiogment versions, then install your local copy with testing dependencies: 28 | 29 | ```bash 30 | pip uninstall pydiogment 31 | pip install . 32 | ``` 33 | 34 | 4. Create a branch to hold your changes: 35 | 36 | ```bash 37 | git checkout -b my-feature 38 | ``` 39 | 40 | 5. Start making changes 41 | 42 | - Please never work directly on the `master` branch! 43 | 44 | 6. Use Git for the to do the version controlling of this copy. When you're done editing, you know the drill `add`, `commit` then `push`: 45 | 46 | ```bash 47 | git add modified_files 48 | git commit 49 | ``` 50 | 51 | to record your changes in Git, push them to GitHub with: 52 | 53 | ```bash 54 | git push -u origin my-feature 55 | ``` 56 | 57 | 7. Finally, go to the web page of the your pydiogment fork repo, and click 'Pull request' button to send your changes to the maintainers for review. 58 | 59 | Remarks 60 | ------- 61 | 62 | It is recommended to check that your contribution complies with the following rules before submitting a pull request: 63 | 64 | - All public methods should have informative docstrings with sample usage presented. 65 | 66 | You can also check for common programming errors with the following tools: 67 | 68 | - Code with good test coverage (at least 80%), check with: 69 | 70 | ```bash 71 | pytest 72 | ``` 73 | 74 | - No pyflakes warnings, check with: 75 | 76 | ```bash 77 | pip install pyflakes 78 | pyflakes path/to/module.py 79 | ``` 80 | 81 | - No PEP8 warnings, check with: 82 | 83 | ```bash 84 | pip install pycodestyle 85 | pycodestyle path/to/module.py 86 | ``` 87 | 88 | - AutoPEP8 and yapf can help you fix some of the easy redundant errors and linter your code: 89 | 90 | ```bash 91 | pip install autopep8 92 | pip install yapf 93 | 94 | 95 | autopep8 path/to/module.py 96 | yapf path/to/module.py 97 | ``` 98 | 99 | Filing bugs 100 | ----------- 101 | 102 | we use Github issues to track all bugs and feature requests. In the case of coming across a bug, having a question or a feature suggestion etc. please feel free to open an issue. However, please make sure that your issue comply with our rules/follows the provided templates: 103 | 104 | - [bug reports template](https://github.com/SuperKogito/pydiogment/blob/master/.github/ISSUE_TEMPLATE/bug_report.md) 105 | 106 | - [feature requests template](https://github.com/SuperKogito/pydiogment/blob/master/.github/ISSUE_TEMPLATE/feature_request.md) 107 | 108 | In addition, please check that your issue complies with the following rules before submitting: 109 | 110 | - Verify that your issue is not being currently addressed by other in[issues](https://github.com/SuperKogito/pydiogment/issues) or [pull requests](https://github.com/SuperKogito/pydiogment/pulls). 111 | 112 | - Please ensure all code snippets and error messages are formatted appropriately. See [Creating and highlighting code blocks](https://help.github.com/articles/creating-and-highlighting-code-blocks). 113 | 114 | - Please include your operating system type and version number, as well as your Python, pydiogment, numpy, and scipy versions. This information can be found by running the following code snippet: 115 | 116 | ```python 117 | import sys 118 | import numpy 119 | import scipy 120 | import pydiogment 121 | import platform 122 | 123 | 124 | print(platform.platform()) 125 | print("Python", sys.version) 126 | print("NumPy", numpy.__version__) 127 | print("SciPy", scipy.__version__) 128 | print("pydiogment", pydiogment.__version__) 129 | ``` 130 | 131 | Documentation 132 | ------------- 133 | 134 | You can edit the documentation using any text editor and then generate the HTML output by typing `make html` from the docs/ directory. The resulting HTML files will be placed in `_build/html/` and are viewable in a web browser. See the README file in the doc/ directory for more information. 135 | 136 | To build the documentation, you will need: 137 | 138 | - [sphinx](http://sphinx.pocoo.org/). 139 | - [sphinxcontrib-napoleon](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/). 140 | 141 | Note 142 | ---- 143 | 144 | This document was based on the [scikit-learn](http://scikit-learn.org/) & [librosa](https://github.com/librosa/librosa) contribution guides. 145 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Ayoub Malek 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | :bell: Pydiogment 6 | ========== 7 | 8 | [![Build Status](https://travis-ci.org/SuperKogito/pydiogment.svg?branch=master)](https://travis-ci.org/SuperKogito/pydiogment) [![Build status](https://ci.appveyor.com/api/projects/status/bnxaa6dw82cyhl5h?svg=true)](https://ci.appveyor.com/project/SuperKogito/pydiogment) [![Documentation Status](https://readthedocs.org/projects/pydiogment/badge/?version=latest)](https://pydiogment.readthedocs.io/en/latest/?badge=latest) [![License](https://img.shields.io/badge/license-BSD%203--Clause%20License%20(Revised)%20-blue)](https://github.com/SuperKogito/pydiogment/blob/master/LICENSE) [![Python](https://img.shields.io/badge/python-3.5%20%7C%203.6%20%7C%203.7-blue)](https://www.python.org/doc/versions/) [![Coverage Status](https://codecov.io/gh/SuperKogito/pydiogment/graph/badge.svg)](https://codecov.io/gh/SuperKogito/pydiogment) [![Coverage Status](https://coveralls.io/repos/github/SuperKogito/pydiogment/badge.svg?branch=master)](https://coveralls.io/github/SuperKogito/pydiogment?branch=master) [![CodeFactor](https://www.codefactor.io/repository/github/superkogito/pydiogment/badge/master)](https://www.codefactor.io/repository/github/superkogito/pydiogment/overview/master) 9 | 10 | **Pydiogment** aims to simplify audio augmentation. It generates multiple audio files based on a starting mono audio file. The library can generates files with higher speed, slower, and different tones etc. 11 | 12 | :inbox_tray: Installation 13 | ============ 14 | 15 | Dependencies 16 | ------------ 17 | 18 | **Pydiogment** requires: 19 | 20 | - [Python](https://www.python.org/download/releases/3.0/) (>= 3.5) 21 | - [NumPy](https://numpy.org/) (>= 1.17.2) 22 | 23 | - [SciPy](https://www.scipy.org/) (>= 1.3.1) 24 | 25 | - [FFmpeg](https://www.ffmpeg.org/) 26 | 27 | ### On Linux 28 | On Linux you can use the following commands to get the libraries: 29 | - Numpy: `pip install numpy` 30 | - Scipy: `pip install scipy` 31 | - FFmpeg: `sudo apt install ffmpeg` 32 | 33 | ### On Windows 34 | On Windows you can use the following installation binaries: 35 | - Numpy: https://www.lfd.uci.edu/~gohlke/pythonlibs/#numpy or if you have Python already installed you can use install it using `pip3 install numpy` 36 | - Scipy: https://www.lfd.uci.edu/~gohlke/pythonlibs/#scipy 37 | - FFmpeg: https://ffmpeg.org/download.html#build-windows 38 | 39 | ### On MacOS 40 | On MacOs, use homebrew to install the packages: 41 | - Numpy: `brew install numpy --with-python3` 42 | - Scipy: You need to first install a compilation tool like Gfortran using homebrew `brew install gfortran` when it's done, install Scipy `pip install scipy` 43 | for more information and guidelines you can check this link: https://github.com/scipy/scipy/blob/master/INSTALL.rst.txt#mac-os-x 44 | - FFmpeg: `brew install ffmpeg` 45 | 46 | 47 | Installation 48 | ------------- 49 | If you already have a working installation of [NumPy](https://numpy.org/) and [SciPy](https://www.scipy.org/) , you can simply install **Pydiogment** using pip: 50 | 51 | ``` 52 | pip install pydiogment 53 | ``` 54 | To update an existing version of **Pydiogment**, use: 55 | ``` 56 | pip install -U pydiogment 57 | ``` 58 | 59 | :bulb: How to use 60 | ========== 61 | 62 | - ## Amplitude related augmentation 63 | - ### Apply a fade in and fade out effect 64 | ```python3 65 | from pydiogment.auga import fade_in_and_out 66 | 67 | test_file = "path/test.wav" 68 | fade_in_and_out(test_file) 69 | ``` 70 | 71 | - ### Apply gain to file 72 | ```python3 73 | from pydiogment.auga import apply_gain 74 | 75 | test_file = "path/test.wav" 76 | apply_gain(test_file, -100) 77 | apply_gain(test_file, -50) 78 | ``` 79 | 80 | - ### Add Random Gaussian Noise based on SNR to file 81 | ```python3 82 | from pydiogment.auga import add_noise 83 | 84 | test_file = "path/test.wav" 85 | add_noise(test_file, 10) 86 | ``` 87 | 88 | 89 | - ## Frequency related augmentation 90 | - ### Change file tone 91 | ```python3 92 | from pydiogment.augf import change_tone 93 | 94 | test_file = "path/test.wav" 95 | change_tone(test_file, 0.9) 96 | change_tone(test_file, 1.1) 97 | ``` 98 | 99 | - ## Time related augmentation 100 | - #### Slow-down/ speed-up file 101 | ```python3 102 | from pydiogment.augt import slowdown, speed 103 | 104 | test_file = "path/test.wav" 105 | slowdown(test_file, 0.8) 106 | speed(test_file, 1.2) 107 | ``` 108 | 109 | - ### Apply random cropping to the file 110 | ```python3 111 | from pydiogment.augt import random_cropping 112 | 113 | test_file = "path/test.wav" 114 | random_cropping(test_file, 1) 115 | ``` 116 | 117 | - ### Change shift data on the time axis in a certain direction 118 | ```python3 119 | from pydiogment.augt import shift_time 120 | 121 | test_file = "path/test.wav" 122 | shift_time(test_file, 1, "right") 123 | shift_time(test_file, 1, "left") 124 | ``` 125 | - ## Audio files format 126 | This library currently supports mono WAV files only. 127 | 128 | :bookmark_tabs: Documentation 129 | ============== 130 | A thorough documentation of the library is available under [pydiogment.readthedocs.io](https://pydiogment.readthedocs.io/en/latest/index.html). 131 | 132 | :construction_worker: Contributing and bugs report 133 | ============ 134 | 135 | Contributions are welcome and encouraged. To learn more about how to contribute to **Pydiogment** please refer to the [Contributing guidelines](https://github.com/SuperKogito/pydiogment/blob/master/CONTRIBUTING.md) 136 | 137 | To report bugs, request a feature or just ask for help you can refer to the [issues](https://github.com/SuperKogito/pydiogment/issuesif) section. 138 | Before reporting a bug please make sure it is not addressed by an older issue and make sure to add your operating system type, its version number and the versions of the dependencies used. 139 | 140 | :tada: Acknowledgment and credits 141 | ============================ 142 | - The test file used in the pytests is [OSR_us_000_0060_8k.wav](https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0060_8k.wav) from the [Open Speech Repository](https://www.voiptroubleshooter.com/open_speech/american.html). 143 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | branches: 2 | only: 3 | - master 4 | 5 | environment: 6 | matrix: 7 | - PYTHON_VERSION: 3.5 8 | MINICONDA: C:\Miniconda 9 | - PYTHON_VERSION: 3.6 10 | MINICONDA: C:\Miniconda3 11 | - PYTHON_VERSION: 3.7 12 | MINICONDA: C:\Miniconda3 13 | 14 | cache: 15 | - "%MINICONDA%\\envs -> appveyor.yml, setup.py" 16 | 17 | init: 18 | - "ECHO %PYTHON_VERSION% %MINICONDA%" 19 | 20 | install: 21 | - "set PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%" 22 | - conda config --set always_yes yes --set changeps1 no 23 | - conda update -q conda 24 | - conda info -a 25 | - "conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy nose" 26 | - activate test-environment 27 | - cinst ffmpeg 28 | - pip install --upgrade setuptools 29 | - pip install pytest 30 | - pip install pytest-cov pytest 31 | - pip install codecov 32 | - pip install coveralls 33 | - pip install pytest-xdist 34 | - pip install codacy-coverage 35 | - pip install matplotlib 36 | - pip install -r requirements.txt 37 | 38 | build: off 39 | test_script: 40 | - pytest 41 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/docs/icon.png -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | 2 | sphinxcontrib-napoleon==0.6.1 3 | numpy 4 | scipy 5 | -------------------------------------------------------------------------------- /docs/source/_static/creativecommons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/docs/source/_static/creativecommons.png -------------------------------------------------------------------------------- /docs/source/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | /* Provided by Sphinx's 'basic' theme, and included in the final set of assets */ 2 | @import "../basic.css"; 3 | @import url('https://fonts.googleapis.com/css2?family=Ubuntu+Mono&display=swap'); 4 | 5 | :root { 6 | /***************************************************************************** 7 | * Theme config 8 | **/ 9 | --pst-header-height: 30px; 10 | 11 | /***************************************************************************** 12 | * Font size 13 | **/ 14 | --pst-font-size-base: 12px; /* base font size - applied at body / html level */ 15 | 16 | /* heading font sizes */ 17 | --pst-font-size-h1: 24px; 18 | --pst-font-size-h2: 22px; 19 | --pst-font-size-h3: 20px; 20 | --pst-font-size-h4: 18px; 21 | --pst-font-size-h5: 16px; 22 | --pst-font-size-h6: 14px; 23 | 24 | /* smaller then heading font sizes*/ 25 | --pst-font-size-milli: 10px; 26 | 27 | --pst-sidebar-font-size: 0.9em; 28 | --pst-sidebar-caption-font-size: 0.9em; 29 | 30 | /***************************************************************************** 31 | * Font family 32 | **/ 33 | /* These are adapted from https://systemfontstack.com/ */ 34 | --pst-font-family-base-system: -apple-system, BlinkMacSystemFont, Segoe UI, 35 | "Helvetica Neue", Arial, sans-serif, Apple Color Emoji, Segoe UI Emoji, 36 | Segoe UI Symbol; 37 | --pst-font-family-monospace-system: "SFMono-Regular", Menlo, Consolas, Monaco, 38 | Liberation Mono, Lucida Console, monospace; 39 | 40 | 41 | 42 | --pst-font-family-base-system: 'Ubuntu Mono', monospace; 43 | --pst-font-family-monospace-system: 'Ubuntu Mono', monospace; 44 | 45 | --pst-font-family-base: var(--pst-font-family-base-system); 46 | --pst-font-family-heading: var(--pst-font-family-base); 47 | --pst-font-family-monospace: var(--pst-font-family-monospace-system); 48 | 49 | /***************************************************************************** 50 | * Color 51 | * 52 | * Colors are defined in rgb string way, "red, green, blue" 53 | **/ 54 | --pst-color-primary: 19, 6, 84; 55 | --pst-color-success: 40, 167, 69; 56 | --pst-color-info: 0, 123, 255; /*23, 162, 184;*/ 57 | --pst-color-warning: 255, 193, 7; 58 | --pst-color-danger: 220, 53, 69; 59 | --pst-color-text-base: 51, 51, 51; 60 | 61 | --pst-color-h1: var(--pst-color-primary); 62 | --pst-color-h2: var(--pst-color-primary); 63 | --pst-color-h3: var(--pst-color-text-base); 64 | --pst-color-h4: var(--pst-color-text-base); 65 | --pst-color-h5: var(--pst-color-text-base); 66 | --pst-color-h6: var(--pst-color-text-base); 67 | --pst-color-paragraph: var(--pst-color-text-base); 68 | --pst-color-link: 0, 91, 129; 69 | --pst-color-link-hover: 227, 46, 0; 70 | --pst-color-headerlink: 198, 15, 15; 71 | --pst-color-headerlink-hover: 255, 255, 255; 72 | --pst-color-preformatted-text: 34, 34, 34; 73 | --pst-color-preformatted-background: 250, 250, 250; 74 | --pst-color-inline-code: 232, 62, 140; 75 | 76 | --pst-color-active-navigation: 19, 6, 84; 77 | --pst-color-navbar-link: 77, 77, 77; 78 | --pst-color-navbar-link-hover: var(--pst-color-active-navigation); 79 | --pst-color-navbar-link-active: var(--pst-color-active-navigation); 80 | --pst-color-sidebar-link: 77, 77, 77; 81 | --pst-color-sidebar-link-hover: var(--pst-color-active-navigation); 82 | --pst-color-sidebar-link-active: var(--pst-color-active-navigation); 83 | --pst-color-sidebar-expander-background-hover: 244, 244, 244; 84 | --pst-color-sidebar-caption: 77, 77, 77; 85 | --pst-color-toc-link: 119, 117, 122; 86 | --pst-color-toc-link-hover: var(--pst-color-active-navigation); 87 | --pst-color-toc-link-active: var(--pst-color-active-navigation); 88 | 89 | /***************************************************************************** 90 | * Icon 91 | **/ 92 | 93 | /* font awesome icons*/ 94 | --pst-icon-check-circle: "\f058"; 95 | --pst-icon-info-circle: "\f05a"; 96 | --pst-icon-exclamation-triangle: "\f071"; 97 | --pst-icon-exclamation-circle: "\f06a"; 98 | --pst-icon-times-circle: "\f057"; 99 | --pst-icon-lightbulb: "\f0eb"; 100 | 101 | /***************************************************************************** 102 | * Admonitions 103 | **/ 104 | 105 | --pst-color-admonition-default: var(--pst-color-info); 106 | --pst-color-admonition-note: var(--pst-color-info); 107 | --pst-color-admonition-attention: var(--pst-color-warning); 108 | --pst-color-admonition-caution: var(--pst-color-warning); 109 | --pst-color-admonition-warning: var(--pst-color-warning); 110 | --pst-color-admonition-danger: var(--pst-color-danger); 111 | --pst-color-admonition-error: var(--pst-color-danger); 112 | --pst-color-admonition-hint: var(--pst-color-success); 113 | --pst-color-admonition-tip: var(--pst-color-success); 114 | --pst-color-admonition-important: var(--pst-color-success); 115 | 116 | --pst-icon-admonition-default: var(--pst-icon-info-circle); 117 | --pst-icon-admonition-note: var(--pst-icon-info-circle); 118 | --pst-icon-admonition-attention: var(--pst-icon-exclamation-circle); 119 | --pst-icon-admonition-caution: var(--pst-icon-exclamation-triangle); 120 | --pst-icon-admonition-warning: var(--pst-icon-exclamation-triangle); 121 | --pst-icon-admonition-danger: var(--pst-icon-exclamation-triangle); 122 | --pst-icon-admonition-error: var(--pst-icon-times-circle); 123 | --pst-icon-admonition-hint: var(--pst-icon-lightbulb); 124 | --pst-icon-admonition-tip: var(--pst-icon-lightbulb); 125 | --pst-icon-admonition-important: var(--pst-icon-exclamation-circle); 126 | 127 | /***************************************************************************** 128 | * versionmodified 129 | **/ 130 | 131 | --pst-color-versionmodified-default: var(--pst-color-info); 132 | --pst-color-versionmodified-added: var(--pst-color-success); 133 | --pst-color-versionmodified-changed: var(--pst-color-warning); 134 | --pst-color-versionmodified-deprecated: var(--pst-color-danger); 135 | 136 | --pst-icon-versionmodified-default: var(--pst-icon-exclamation-circle); 137 | --pst-icon-versionmodified-added: var(--pst-icon-exclamation-circle); 138 | --pst-icon-versionmodified-changed: var(--pst-icon-exclamation-circle); 139 | --pst-icon-versionmodified-deprecated: var(--pst-icon-exclamation-circle); 140 | } 141 | 142 | /* Bio area */ 143 | div.profile-pic { 144 | margin-top: 1em; 145 | } 146 | 147 | div.profile-pic img { 148 | border-radius: 500px; 149 | width: 80%; 150 | max-width: 190px; 151 | margin: 0 auto; 152 | display: block; 153 | } 154 | 155 | .bio-info { 156 | margin: 1em auto; 157 | max-width: 220px; 158 | } 159 | 160 | .name { 161 | font-size: 38px; 162 | } 163 | 164 | .focusareas { 165 | font-size: .9em; 166 | font-weight: bold; 167 | } 168 | 169 | .whatido { 170 | margin-top: 1em; 171 | } 172 | 173 | 174 | /* Sidebar for blog archive / each post */ 175 | ul.ablog-archive { 176 | padding-left: 0px; 177 | } 178 | 179 | .bd-sidebar h2 { 180 | font-size: 1.4em; 181 | } 182 | 183 | .bd-sidebar ul { 184 | padding-left: 0; 185 | list-style-type: none; 186 | } 187 | 188 | .bd-sidebar li { 189 | padding-bottom: .5em; 190 | } 191 | 192 | div.bd-sidebar h3, div.bd-sidebar h2, div.bd-sidebar ul { 193 | padding-left: 5%; 194 | } 195 | 196 | /* In-page post lists */ 197 | ul.postlist { 198 | padding-left: 0; 199 | } 200 | 201 | ul.postlist > li > p:first-child { 202 | font-size: 14px; 203 | } 204 | 205 | ul.postlist li + li { 206 | margin-top: 0em; 207 | } 208 | 209 | ul.postlist li > p > a { 210 | font-style: normal; 211 | font-size: 14px; 212 | } 213 | 214 | 215 | /* Timeline CSS */ 216 | div.timeline div.card { 217 | border: 0px; 218 | } 219 | 220 | div.timeline div.left { 221 | text-align: right; 222 | border-right: 1px solid black; 223 | } 224 | 225 | div.timeline div.entry::after { 226 | width: 1em; 227 | height: 1em; 228 | background: white; 229 | border-radius: 50%; 230 | content: ""; 231 | top: 1em; 232 | display: block; 233 | position: absolute; 234 | border: 1px black solid; 235 | z-index: 999; 236 | } 237 | 238 | div.timeline div.entry.left::after { 239 | right: -.5em; 240 | } 241 | 242 | div.timeline div.entry.right::after { 243 | left: -.5em; 244 | } 245 | 246 | 247 | /* Blog post comments */ 248 | .gsc-reactions { 249 | margin-top: 1em; 250 | } 251 | -------------------------------------------------------------------------------- /docs/source/_static/favicon_io/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/docs/source/_static/favicon_io/android-chrome-192x192.png -------------------------------------------------------------------------------- /docs/source/_static/favicon_io/android-chrome-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/docs/source/_static/favicon_io/android-chrome-512x512.png -------------------------------------------------------------------------------- /docs/source/_static/favicon_io/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/docs/source/_static/favicon_io/apple-touch-icon.png -------------------------------------------------------------------------------- /docs/source/_static/favicon_io/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/docs/source/_static/favicon_io/favicon-16x16.png -------------------------------------------------------------------------------- /docs/source/_static/favicon_io/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/docs/source/_static/favicon_io/favicon-32x32.png -------------------------------------------------------------------------------- /docs/source/_static/favicon_io/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/docs/source/_static/favicon_io/favicon.ico -------------------------------------------------------------------------------- /docs/source/_static/favicon_io/site.webmanifest: -------------------------------------------------------------------------------- 1 | {"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"} -------------------------------------------------------------------------------- /docs/source/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/docs/source/_static/logo.png -------------------------------------------------------------------------------- /docs/source/_static/logo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/docs/source/_static/logo1.png -------------------------------------------------------------------------------- /docs/source/_templates/footer.html: -------------------------------------------------------------------------------- 1 | 48 | -------------------------------------------------------------------------------- /docs/source/_templates/hello.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 |
5 |
6 |
7 | 🔉 Pydiogment: Python auDio auGmentation 〰️ 8 |
9 |
10 | -------------------------------------------------------------------------------- /docs/source/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {%- extends "pydata_sphinx_theme/layout.html" %} 2 | {% block docs_body %} 3 | 4 | 5 | 6 | 16 | 17 | 18 | {{ super() }} 19 | 20 | {% endblock %} 21 | -------------------------------------------------------------------------------- /docs/source/_templates/navbar-nav.html: -------------------------------------------------------------------------------- 1 | 9 | -------------------------------------------------------------------------------- /docs/source/_templates/navbar_version.html: -------------------------------------------------------------------------------- 1 | 2 | TEST: v{{ version }} 3 | -------------------------------------------------------------------------------- /docs/source/_templates/sidebar-nav-bs.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 |
5 |
6 |
7 | 🔉 Pydiogment: Python auDio auGmentation 〰️ 8 |
9 |
10 | 11 | 21 | -------------------------------------------------------------------------------- /docs/source/auga.rst: -------------------------------------------------------------------------------- 1 | pydiogment.auga 2 | ================ 3 | 4 | 5 | .. automodule:: pydiogment.auga 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/augf.rst: -------------------------------------------------------------------------------- 1 | pydiogment.augf 2 | ================ 3 | 4 | 5 | .. automodule:: pydiogment.augf 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/augt.rst: -------------------------------------------------------------------------------- 1 | pydiogment.augt 2 | ================ 3 | 4 | 5 | .. automodule:: pydiogment.augt 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/code.rst: -------------------------------------------------------------------------------- 1 | 📄 API Documentation 2 | ===================== 3 | 4 | .. toctree:: 5 | 6 | auga 7 | augf 8 | augt 9 | io 10 | filters 11 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | from datetime import datetime 18 | sys.path.insert(0, os.path.abspath('../..')) 19 | sys.path.insert(0, os.path.abspath('../../pydiogment/')) 20 | 21 | 22 | 23 | # -- Project information ----------------------------------------------------- 24 | 25 | project = '🧠 SuperKogito/pydiogment' 26 | copyright = "2019-%s, Ayoub Malek" % datetime.now().year 27 | author = 'Ayoub Malek' 28 | html_favicon = "_static/favicon_io/favicon.ico" 29 | 30 | # The short X.Y version 31 | version = '' 32 | # The full version, including alpha/beta/rc tags 33 | release = '0.0.1' 34 | 35 | 36 | # -- General configuration --------------------------------------------------- 37 | 38 | # If your documentation needs a minimal Sphinx version, state it here. 39 | # 40 | # needs_sphinx = '1.0' 41 | 42 | # Add any Sphinx extension module names here, as strings. They can be 43 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 44 | # ones. 45 | extensions = [ 46 | 'sphinx.ext.autodoc', 47 | 'sphinx.ext.doctest', 48 | 'sphinx.ext.intersphinx', 49 | 'sphinxcontrib.napoleon', 50 | 'sphinx.ext.todo', 51 | 'sphinx.ext.coverage', 52 | 'sphinx.ext.mathjax', 53 | 'sphinx.ext.ifconfig', 54 | 'sphinx.ext.viewcode', 55 | ] 56 | 57 | # Add any paths that contain templates here, relative to this directory. 58 | templates_path = ['_templates'] 59 | 60 | # The suffix(es) of source filenames. 61 | # You can specify multiple suffix as a list of string: 62 | # 63 | # source_suffix = ['.rst', '.md'] 64 | source_suffix = '.rst' 65 | 66 | # The master toctree document. 67 | master_doc = 'index' 68 | 69 | # The language for content autogenerated by Sphinx. Refer to documentation 70 | # for a list of supported languages. 71 | # 72 | # This is also used if you do content translation via gettext catalogs. 73 | # Usually you set "language" from the command line for these cases. 74 | language = None 75 | 76 | # List of patterns, relative to source directory, that match files and 77 | # directories to ignore when looking for source files. 78 | # This pattern also affects html_static_path and html_extra_path. 79 | exclude_patterns = [] 80 | 81 | # The name of the Pygments (syntax highlighting) style to use. 82 | pygments_style = None 83 | 84 | 85 | # -- Options for HTML output ------------------------------------------------- 86 | 87 | # The theme to use for HTML and HTML Help pages. See the documentation for 88 | # a list of builtin themes. 89 | html_theme = "pydata_sphinx_theme" 90 | 91 | html_theme_options = { 92 | "github_url": "https://github.com/superkogito/", 93 | "search_bar_text": "Search this site...", 94 | "google_analytics_id": "UA-133660046-1", 95 | 96 | "navbar_start": ["navbar-logo"], 97 | "navbar_center": ["navbar-nav"], 98 | "navbar_end": ["search-field.html", "version-switcher", "navbar-icon-links"], 99 | 100 | "switcher": { 101 | "json_url": "https://pandas.pydata.org/versions.json", 102 | "url_template": "https://pandas.pydata.org/{version}/", 103 | }, 104 | 105 | "external_links": [ 106 | {"name": "Home", "url": "https://superkogito.github.io/index.html"}, 107 | {"name": "Projects", "url": "https://superkogito.github.io/projects.html"}, 108 | {"name": "Blog", "url": "https://superkogito.github.io/blog.html"}, 109 | {"name": "About Me", "url": "https://superkogito.github.io/about.html"} 110 | ], 111 | } 112 | 113 | html_sidebars = { 114 | "index.html": ["sidebar-nav-bs.html"], 115 | "**": ["sidebar-nav-bs.html", ] 116 | } 117 | 118 | 119 | blog_baseurl = "https://superkogito.github.io" 120 | blog_title = "SuperKogito" 121 | blog_path = "blog" 122 | fontawesome_included = True 123 | blog_post_pattern = "blog/*/*" 124 | post_redirect_refresh = 1 125 | post_auto_image = 0 126 | post_auto_excerpt = 1 127 | 128 | # Theme options are theme-specific and customize the look and feel of a theme 129 | # further. For a list of options available for each theme, see the 130 | # documentation. 131 | # 132 | # html_theme_options = {} 133 | 134 | # Add any paths that contain custom static files (such as style sheets) here, 135 | # relative to this directory. They are copied after the builtin static files, 136 | # so a file named "default.css" will overwrite the builtin "default.css". 137 | html_static_path = ['_static'] 138 | html_css_files = [ 139 | "css/custom.css", 140 | "css/tree_graph.css", 141 | "css/social_media_sharing.css", 142 | ] 143 | 144 | # Custom sidebar templates, must be a dictionary that maps document names 145 | # to template names. 146 | # 147 | # The default sidebars (for documents that don't match any pattern) are 148 | # defined by theme itself. Builtin themes are using these templates by 149 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 150 | # 'searchbox.html']``. 151 | # 152 | # html_sidebars = {} 153 | 154 | 155 | # -- Options for HTMLHelp output --------------------------------------------- 156 | 157 | # Output file base name for HTML help builder. 158 | htmlhelp_basename = 'pydiogmentdoc' 159 | 160 | 161 | # -- Options for LaTeX output ------------------------------------------------ 162 | 163 | latex_elements = { 164 | # The paper size ('letterpaper' or 'a4paper'). 165 | # 166 | # 'papersize': 'letterpaper', 167 | 168 | # The font size ('10pt', '11pt' or '12pt'). 169 | # 170 | # 'pointsize': '10pt', 171 | 172 | # Additional stuff for the LaTeX preamble. 173 | # 174 | # 'preamble': '', 175 | 176 | # Latex figure (float) alignment 177 | # 178 | # 'figure_align': 'htbp', 179 | } 180 | 181 | # Grouping the document tree into LaTeX files. List of tuples 182 | # (source start file, target name, title, 183 | # author, documentclass [howto, manual, or own class]). 184 | latex_documents = [ 185 | (master_doc, 'pydiogment.tex', 'pydiogment Documentation', 186 | 'Ayoub Malek', 'manual'), 187 | ] 188 | 189 | 190 | # -- Options for manual page output ------------------------------------------ 191 | 192 | # One entry per manual page. List of tuples 193 | # (source start file, name, description, authors, manual section). 194 | man_pages = [ 195 | (master_doc, 'pydiogment', 'pydiogment Documentation', 196 | [author], 1) 197 | ] 198 | 199 | 200 | # -- Options for Texinfo output ---------------------------------------------- 201 | 202 | # Grouping the document tree into Texinfo files. List of tuples 203 | # (source start file, target name, title, author, 204 | # dir menu entry, description, category) 205 | texinfo_documents = [ 206 | (master_doc, 'pydiogment', 'pydiogment Documentation', 207 | author, 'pydiogment', 'One line description of project.', 208 | 'Miscellaneous'), 209 | ] 210 | 211 | 212 | # -- Options for Epub output ------------------------------------------------- 213 | 214 | # Bibliographic Dublin Core info. 215 | epub_title = project 216 | 217 | # The unique identifier of the text. This can be a ISBN number 218 | # or the project homepage. 219 | # 220 | # epub_identifier = '' 221 | 222 | # A unique identification for the text. 223 | # 224 | # epub_uid = '' 225 | 226 | # A list of files that should not be packed into the epub file. 227 | epub_exclude_files = ['search.html'] 228 | 229 | 230 | # -- Extension configuration ------------------------------------------------- 231 | 232 | # -- Options for intersphinx extension --------------------------------------- 233 | 234 | # Example configuration for intersphinx: refer to the Python standard library. 235 | # intersphinx 236 | intersphinx_mapping = { 237 | "numpy": ("https://numpy.org/doc/stable/", None), 238 | "python": ("https://docs.python.org/", None), 239 | "scipy": ("https://docs.scipy.org/doc/scipy/", None), 240 | } 241 | 242 | # -- Options for todo extension ---------------------------------------------- 243 | 244 | # If true, `todo` and `todoList` produce output, else they produce nothing. 245 | todo_include_todos = True 246 | -------------------------------------------------------------------------------- /docs/source/filters.rst: -------------------------------------------------------------------------------- 1 | pydiogment.utils.filters 2 | ======================== 3 | 4 | 5 | .. automodule:: pydiogment.utils.filters 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. pydiogment documentation master file, created by 2 | sphinx-quickstart on Mon Dec 9 14:08:11 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to pydiogment's documentation! 7 | ====================================== 8 | 9 | .. image:: _static/logo1.png 10 | :scale: 75 % 11 | :align: center 12 | 13 | 14 | Pydiogment (python audio augmentation) aims to simplify audio augmentation. 15 | It generates multiple audio files based on a starting mono audio file. 16 | The library can generates files with higher speed, slower, and different tones etc. 17 | 18 | 19 | Dependencies 20 | ------------ 21 | 22 | pydiogment is built using Python3_ and it requires the following: 23 | 24 | - Python packages: 25 | - NumPy_ (>= 1.17.2) : ``pip install numpy`` 26 | - SciPy_ (>= 1.3.1) : ``pip install scipy`` 27 | 28 | Or you can simply use the requirements file in ``pip install -r requirements.txt`` 29 | 30 | - Pydiogment also requires FFmpeg_, which you can install using: ``sudo apt install ffmpeg`` 31 | 32 | 33 | Installation 34 | ------------ 35 | 36 | If you already have a working installation of numpy and scipy, you can simply install pydiogment using pip: 37 | 38 | ``pip install pydiogment`` 39 | 40 | To update an exisiting pydiogment version use: 41 | 42 | ``pip install -U pydiogment`` 43 | 44 | Documentation 45 | ------------- 46 | 47 | .. toctree:: 48 | 49 | code 50 | 51 | 52 | Citation 53 | -------- 54 | 55 | @software{ayoubmalek2020, 56 | author = {Ayoub Malek}, 57 | title = {pydiogment/pydiogment: 0.1.0}, 58 | month = Apr, 59 | year = 2020, 60 | version = {0.1.2}, 61 | url = {https://github.com/SuperKogito/spafe} 62 | } 63 | 64 | 65 | .. _Python3 : https://www.python.org/download/releases/3.0/ 66 | .. _NumPy : https://numpy.org/ 67 | .. _SciPy : https://scipy.org/ 68 | .. _FFmpeg : https://www.ffmpeg.org/ 69 | -------------------------------------------------------------------------------- /docs/source/io.rst: -------------------------------------------------------------------------------- 1 | pydiogment.utils.io 2 | =================== 3 | 4 | 5 | .. automodule:: pydiogment.utils.io 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /language.json: -------------------------------------------------------------------------------- 1 | { 2 | "knownWords": [ 3 | "Butterworth", 4 | "FFmpeg", 5 | "Pydiogment", 6 | "Pytorch", 7 | "Resample", 8 | "Scipy", 9 | "Specaugment", 10 | "convolve", 11 | "muda", 12 | "resampled", 13 | "spectograms" 14 | ] 15 | } -------------------------------------------------------------------------------- /paper/Makefile: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # @file Makefile 3 | # @brief Makefile for generating previews of the paper 4 | # @author Michael Hucka 5 | # @license Please see the file named LICENSE in the project directory 6 | # @website https://github.com/casics/dassie 7 | # ============================================================================= 8 | 9 | # Change the following values to match your configuration. 10 | # ............................................................................. 11 | 12 | assets := assets 13 | input := paper.md 14 | output := paper.pdf 15 | bib := paper.bib 16 | bibstyle := apa.csl 17 | 18 | # Main code -- no more customization variables after this point 19 | # ............................................................................. 20 | 21 | title := $(shell grep title: $(input) | sed 's/title: *//' | tr -d "'") 22 | authors := $(shell sed -n '/authors:/,/affiliations:/p' $(input) | grep name: | sed 's/- name: *//' | paste -d, -s - | sed 's/,/, /g') 23 | repo := $(shell git remote get-url origin | sed 's|git@github.com:|https://github.com/|' | sed 's/\.git//') 24 | 25 | $(output): $(input) $(bib) Makefile 26 | pandoc \ 27 | --from markdown+autolink_bare_uris+implicit_figures \ 28 | -V paper_title="$(title)" \ 29 | -V citation_author="$(authors)" \ 30 | -V repository="$(repo)" \ 31 | -V archive_doi="http://dx.doi.org/00.00000/zenodo.0000000" \ 32 | -V formatted_doi="00.00000/joss.00000" \ 33 | -V paper_url="http://joss.theoj.org/papers/" \ 34 | -V review_issue_url="http://joss.theoj.org/papers/" \ 35 | -V issue="0" \ 36 | -V volume="00" \ 37 | -V year="2020" \ 38 | -V submitted="25 February 2020" \ 39 | -V published="25 February 2020" \ 40 | -V page="00" \ 41 | -V graphics="true" \ 42 | -V geometry:margin=1in \ 43 | -V logo_path=$(assets)/logo.png \ 44 | --filter pandoc-citeproc --csl $(assets)/$(bibstyle) \ 45 | --template $(assets)/latex.template \ 46 | $< -o $@ \ 47 | 48 | clean: 49 | rm -rf *.pdf 50 | -------------------------------------------------------------------------------- /paper/README.md: -------------------------------------------------------------------------------- 1 | # Pydiogment-paper 2 | To build this paper offline, use the `make` command. 3 | For the building requirements, please refer to [JOSS Requirements](https://joss.readthedocs.io/en/latest/submitting.html). 4 | -------------------------------------------------------------------------------- /paper/assets/apa.csl: -------------------------------------------------------------------------------- 1 | 2 | 444 | -------------------------------------------------------------------------------- /paper/assets/latex.template: -------------------------------------------------------------------------------- 1 | \documentclass[10pt,a4paper,onecolumn]{article} 2 | \usepackage{marginnote} 3 | \usepackage{graphicx} 4 | \usepackage{xcolor} 5 | \usepackage{authblk,etoolbox} 6 | \usepackage{titlesec} 7 | \usepackage{calc} 8 | \usepackage{tikz} 9 | \usepackage{hyperref} 10 | \hypersetup{colorlinks,breaklinks, 11 | urlcolor=[rgb]{0.0, 0.5, 1.0}, 12 | linkcolor=[rgb]{0.0, 0.5, 1.0}} 13 | \usepackage{caption} 14 | \usepackage{tcolorbox} 15 | \usepackage{amssymb,amsmath} 16 | \usepackage{ifxetex,ifluatex} 17 | \usepackage{seqsplit} 18 | \usepackage{xstring} 19 | 20 | \usepackage{fixltx2e} % provides \textsubscript 21 | \usepackage[ 22 | backend=biber, 23 | % style=alphabetic, 24 | % citestyle=numeric 25 | ]{biblatex} 26 | \bibliography{$bibliography$} 27 | 28 | % --- Splitting \texttt -------------------------------------------------- 29 | 30 | \let\textttOrig=\texttt 31 | \def\texttt#1{\expandafter\textttOrig{\seqsplit{#1}}} 32 | \renewcommand{\seqinsert}{\ifmmode 33 | \allowbreak 34 | \else\penalty6000\hspace{0pt plus 0.02em}\fi} 35 | 36 | 37 | % --- Pandoc does not distinguish between links like [foo](bar) and 38 | % --- [foo](foo) -- a simplistic Markdown model. However, this is 39 | % --- wrong: in links like [foo](foo) the text is the url, and must 40 | % --- be split correspondingly. 41 | % --- Here we detect links \href{foo}{foo}, and also links starting 42 | % --- with https://doi.org, and use path-like splitting (but not 43 | % --- escaping!) with these links. 44 | % --- Another vile thing pandoc does is the different escaping of 45 | % --- foo and bar. This may confound our detection. 46 | % --- This problem we do not try to solve at present, with the exception 47 | % --- of doi-like urls, which we detect correctly. 48 | 49 | 50 | \makeatletter 51 | \let\href@Orig=\href 52 | \def\href@Urllike#1#2{\href@Orig{#1}{\begingroup 53 | \def\Url@String{#2}\Url@FormatString 54 | \endgroup}} 55 | \def\href@Notdoi#1#2{\def\tempa{#1}\def\tempb{#2}% 56 | \ifx\tempa\tempb\relax\href@Urllike{#1}{#2}\else 57 | \href@Orig{#1}{#2}\fi} 58 | \def\href#1#2{% 59 | \IfBeginWith{#1}{https://doi.org}% 60 | {\href@Urllike{#1}{#2}}{\href@Notdoi{#1}{#2}}} 61 | \makeatother 62 | 63 | 64 | % --- Page layout ------------------------------------------------------------- 65 | \usepackage[top=3.5cm, bottom=3cm, right=1.5cm, left=1.0cm, 66 | headheight=2.2cm, reversemp, includemp, marginparwidth=4.5cm]{geometry} 67 | 68 | % --- Default font ------------------------------------------------------------ 69 | % \renewcommand\familydefault{\sfdefault} 70 | 71 | % --- Style ------------------------------------------------------------------- 72 | \renewcommand{\bibfont}{\small \sffamily} 73 | \renewcommand{\captionfont}{\small\sffamily} 74 | \renewcommand{\captionlabelfont}{\bfseries} 75 | 76 | % --- Section/SubSection/SubSubSection ---------------------------------------- 77 | \titleformat{\section} 78 | {\normalfont\sffamily\Large\bfseries} 79 | {}{0pt}{} 80 | \titleformat{\subsection} 81 | {\normalfont\sffamily\large\bfseries} 82 | {}{0pt}{} 83 | \titleformat{\subsubsection} 84 | {\normalfont\sffamily\bfseries} 85 | {}{0pt}{} 86 | \titleformat*{\paragraph} 87 | {\sffamily\normalsize} 88 | 89 | 90 | % --- Header / Footer --------------------------------------------------------- 91 | \usepackage{fancyhdr} 92 | \pagestyle{fancy} 93 | \fancyhf{} 94 | %\renewcommand{\headrulewidth}{0.50pt} 95 | \renewcommand{\headrulewidth}{0pt} 96 | \fancyhead[L]{\hspace{-0.75cm}\includegraphics[width=5.5cm]{$logo_path$}} 97 | \fancyhead[C]{} 98 | \fancyhead[R]{} 99 | \renewcommand{\footrulewidth}{0.25pt} 100 | 101 | \fancyfoot[L]{\parbox[t]{0.98\headwidth}{\footnotesize{\sffamily $citation_author$, ($year$). $footnote_paper_title$. \textit{$journal_name$}, $volume$($issue$), $page$. \url{https://doi.org/$formatted_doi$}}}} 102 | 103 | 104 | \fancyfoot[R]{\sffamily \thepage} 105 | \makeatletter 106 | \let\ps@plain\ps@fancy 107 | \fancyheadoffset[L]{4.5cm} 108 | \fancyfootoffset[L]{4.5cm} 109 | 110 | % --- Macros --------- 111 | 112 | \definecolor{linky}{rgb}{0.0, 0.5, 1.0} 113 | 114 | \newtcolorbox{repobox} 115 | {colback=red, colframe=red!75!black, 116 | boxrule=0.5pt, arc=2pt, left=6pt, right=6pt, top=3pt, bottom=3pt} 117 | 118 | \newcommand{\ExternalLink}{% 119 | \tikz[x=1.2ex, y=1.2ex, baseline=-0.05ex]{% 120 | \begin{scope}[x=1ex, y=1ex] 121 | \clip (-0.1,-0.1) 122 | --++ (-0, 1.2) 123 | --++ (0.6, 0) 124 | --++ (0, -0.6) 125 | --++ (0.6, 0) 126 | --++ (0, -1); 127 | \path[draw, 128 | line width = 0.5, 129 | rounded corners=0.5] 130 | (0,0) rectangle (1,1); 131 | \end{scope} 132 | \path[draw, line width = 0.5] (0.5, 0.5) 133 | -- (1, 1); 134 | \path[draw, line width = 0.5] (0.6, 1) 135 | -- (1, 1) -- (1, 0.6); 136 | } 137 | } 138 | 139 | % --- Title / Authors --------------------------------------------------------- 140 | % patch \maketitle so that it doesn't center 141 | \patchcmd{\@maketitle}{center}{flushleft}{}{} 142 | \patchcmd{\@maketitle}{center}{flushleft}{}{} 143 | % patch \maketitle so that the font size for the title is normal 144 | \patchcmd{\@maketitle}{\LARGE}{\LARGE\sffamily}{}{} 145 | % patch the patch by authblk so that the author block is flush left 146 | \def\maketitle{{% 147 | \renewenvironment{tabular}[2][] 148 | {\begin{flushleft}} 149 | {\end{flushleft}} 150 | \AB@maketitle}} 151 | \makeatletter 152 | \renewcommand\AB@affilsepx{ \protect\Affilfont} 153 | %\renewcommand\AB@affilnote[1]{{\bfseries #1}\hspace{2pt}} 154 | \renewcommand\AB@affilnote[1]{{\bfseries #1}\hspace{3pt}} 155 | \renewcommand{\affil}[2][]% 156 | {\newaffiltrue\let\AB@blk@and\AB@pand 157 | \if\relax#1\relax\def\AB@note{\AB@thenote}\else\def\AB@note{#1}% 158 | \setcounter{Maxaffil}{0}\fi 159 | \begingroup 160 | \let\href=\href@Orig 161 | \let\texttt=\textttOrig 162 | \let\protect\@unexpandable@protect 163 | \def\thanks{\protect\thanks}\def\footnote{\protect\footnote}% 164 | \@temptokena=\expandafter{\AB@authors}% 165 | {\def\\{\protect\\\protect\Affilfont}\xdef\AB@temp{#2}}% 166 | \xdef\AB@authors{\the\@temptokena\AB@las\AB@au@str 167 | \protect\\[\affilsep]\protect\Affilfont\AB@temp}% 168 | \gdef\AB@las{}\gdef\AB@au@str{}% 169 | {\def\\{, \ignorespaces}\xdef\AB@temp{#2}}% 170 | \@temptokena=\expandafter{\AB@affillist}% 171 | \xdef\AB@affillist{\the\@temptokena \AB@affilsep 172 | \AB@affilnote{\AB@note}\protect\Affilfont\AB@temp}% 173 | \endgroup 174 | \let\AB@affilsep\AB@affilsepx 175 | } 176 | \makeatother 177 | \renewcommand\Authfont{\sffamily\bfseries} 178 | \renewcommand\Affilfont{\sffamily\small\mdseries} 179 | \setlength{\affilsep}{1em} 180 | 181 | 182 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex 183 | \usepackage[$if(fontenc)$$fontenc$$else$T1$endif$]{fontenc} 184 | \usepackage[utf8]{inputenc} 185 | 186 | \else % if luatex or xelatex 187 | \ifxetex 188 | \usepackage{mathspec} 189 | \else 190 | \usepackage{fontspec} 191 | \fi 192 | \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase} 193 | 194 | \fi 195 | % use upquote if available, for straight quotes in verbatim environments 196 | \IfFileExists{upquote.sty}{\usepackage{upquote}}{} 197 | % use microtype if available 198 | \IfFileExists{microtype.sty}{% 199 | \usepackage{microtype} 200 | \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts 201 | }{} 202 | 203 | \usepackage{hyperref} 204 | $if(colorlinks)$ 205 | \PassOptionsToPackage{usenames,dvipsnames}{color} % color is loaded by hyperref 206 | $endif$ 207 | \hypersetup{unicode=true, 208 | $if(title-meta)$ 209 | pdftitle={$title-meta$}, 210 | $endif$ 211 | $if(author-meta)$ 212 | pdfauthor={$author-meta$}, 213 | $endif$ 214 | $if(keywords)$ 215 | pdfkeywords={$for(keywords)$$keywords$$sep$; $endfor$}, 216 | $endif$ 217 | $if(colorlinks)$ 218 | colorlinks=true, 219 | linkcolor=$if(linkcolor)$$linkcolor$$else$Maroon$endif$, 220 | citecolor=$if(citecolor)$$citecolor$$else$Blue$endif$, 221 | urlcolor=$if(urlcolor)$$urlcolor$$else$Blue$endif$, 222 | $else$ 223 | pdfborder={0 0 0}, 224 | $endif$ 225 | breaklinks=true} 226 | \urlstyle{same} % don't use monospace font for urls 227 | $if(lang)$ 228 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex 229 | \usepackage[shorthands=off,$for(babel-otherlangs)$$babel-otherlangs$,$endfor$main=$babel-lang$]{babel} 230 | $if(babel-newcommands)$ 231 | $babel-newcommands$ 232 | $endif$ 233 | \else 234 | \usepackage{polyglossia} 235 | \setmainlanguage[$polyglossia-lang.options$]{$polyglossia-lang.name$} 236 | $for(polyglossia-otherlangs)$ 237 | \setotherlanguage[$polyglossia-otherlangs.options$]{$polyglossia-otherlangs.name$} 238 | $endfor$ 239 | \fi 240 | $endif$ 241 | $if(natbib)$ 242 | \usepackage{natbib} 243 | \bibliographystyle{$if(biblio-style)$$biblio-style$$else$plainnat$endif$} 244 | $endif$ 245 | $if(biblatex)$ 246 | \usepackage$if(biblio-style)$[style=$biblio-style$]$endif${biblatex} 247 | $if(biblatexoptions)$\ExecuteBibliographyOptions{$for(biblatexoptions)$$biblatexoptions$$sep$,$endfor$}$endif$ 248 | $for(bibliography)$ 249 | \addbibresource{$bibliography$} 250 | $endfor$ 251 | $endif$ 252 | $if(csl-refs)$ 253 | \newlength{\cslhangindent} % set up new length 254 | \setlength{\cslhangindent}{$if(csl-hanging-indent)$2em$else$0em$endif$} 255 | \newenvironment{cslreferences}% 256 | {\everypar{\setlength{\hangindent}{\cslhangindent}}}% 257 | {\par} % by default, this env does not change anything 258 | $endif$ 259 | $if(listings)$ 260 | \usepackage{listings} 261 | $endif$ 262 | $if(lhs)$ 263 | \lstnewenvironment{code}{\lstset{language=Haskell,basicstyle=\small\ttfamily}}{} 264 | $endif$ 265 | $if(highlighting-macros)$ 266 | $highlighting-macros$ 267 | $endif$ 268 | $if(verbatim-in-note)$ 269 | \usepackage{fancyvrb} 270 | \VerbatimFootnotes % allows verbatim text in footnotes 271 | $endif$ 272 | $if(tables)$ 273 | \usepackage{longtable,booktabs} 274 | $endif$ 275 | 276 | % --- We redefined \texttt, but in sections and captions we want the 277 | % --- old definition 278 | \let\addcontentslineOrig=\addcontentsline 279 | \def\addcontentsline#1#2#3{\bgroup 280 | \let\texttt=\textttOrig\addcontentslineOrig{#1}{#2}{#3}\egroup} 281 | \let\markbothOrig\markboth 282 | \def\markboth#1#2{\bgroup 283 | \let\texttt=\textttOrig\markbothOrig{#1}{#2}\egroup} 284 | \let\markrightOrig\markright 285 | \def\markright#1{\bgroup 286 | \let\texttt=\textttOrig\markrightOrig{#1}\egroup} 287 | 288 | 289 | $if(graphics)$ 290 | \usepackage{graphicx,grffile} 291 | \makeatletter 292 | \def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} 293 | \def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} 294 | \makeatother 295 | % Scale images if necessary, so that they will not overflow the page 296 | % margins by default, and it is still possible to overwrite the defaults 297 | % using explicit options in \includegraphics[width, height, ...]{} 298 | \setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} 299 | $endif$ 300 | $if(links-as-notes)$ 301 | % Make links footnotes instead of hotlinks: 302 | \renewcommand{\href}[2]{#2\footnote{\url{#1}}} 303 | $endif$ 304 | $if(strikeout)$ 305 | \usepackage[normalem]{ulem} 306 | % avoid problems with \sout in headers with hyperref: 307 | \pdfstringdefDisableCommands{\renewcommand{\sout}{}} 308 | $endif$ 309 | $if(indent)$ 310 | $else$ 311 | \IfFileExists{parskip.sty}{% 312 | \usepackage{parskip} 313 | }{% else 314 | \setlength{\parindent}{0pt} 315 | \setlength{\parskip}{6pt plus 2pt minus 1pt} 316 | } 317 | $endif$ 318 | \setlength{\emergencystretch}{3em} % prevent overfull lines 319 | \providecommand{\tightlist}{% 320 | \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} 321 | $if(numbersections)$ 322 | \setcounter{secnumdepth}{5} 323 | $else$ 324 | \setcounter{secnumdepth}{0} 325 | $endif$ 326 | $if(subparagraph)$ 327 | $else$ 328 | % Redefines (sub)paragraphs to behave more like sections 329 | \ifx\paragraph\undefined\else 330 | \let\oldparagraph\paragraph 331 | \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}} 332 | \fi 333 | \ifx\subparagraph\undefined\else 334 | \let\oldsubparagraph\subparagraph 335 | \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}} 336 | \fi 337 | $endif$ 338 | $if(dir)$ 339 | \ifxetex 340 | % load bidi as late as possible as it modifies e.g. graphicx 341 | $if(latex-dir-rtl)$ 342 | \usepackage[RTLdocument]{bidi} 343 | $else$ 344 | \usepackage{bidi} 345 | $endif$ 346 | \fi 347 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex 348 | \TeXXeTstate=1 349 | \newcommand{\RL}[1]{\beginR #1\endR} 350 | \newcommand{\LR}[1]{\beginL #1\endL} 351 | \newenvironment{RTL}{\beginR}{\endR} 352 | \newenvironment{LTR}{\beginL}{\endL} 353 | \fi 354 | $endif$ 355 | $for(header-includes)$ 356 | $header-includes$ 357 | $endfor$ 358 | 359 | $if(title)$ 360 | \title{$title$$if(thanks)$\thanks{$thanks$}$endif$} 361 | $endif$ 362 | $if(subtitle)$ 363 | \providecommand{\subtitle}[1]{} 364 | \subtitle{$subtitle$} 365 | $endif$ 366 | 367 | $if(authors)$ 368 | $for(authors)$ 369 | $if(authors.affiliation)$ 370 | \author[$authors.affiliation$]{$authors.name$} 371 | $else$ 372 | \author{$authors.name$} 373 | $endif$ 374 | $endfor$ 375 | $endif$ 376 | 377 | $if(affiliations)$ 378 | $for(affiliations)$ 379 | \affil[$affiliations.index$]{$affiliations.name$} 380 | $endfor$ 381 | $endif$ 382 | \date{\vspace{-5ex}} 383 | 384 | \begin{document} 385 | $if(title)$ 386 | \maketitle 387 | $endif$ 388 | $if(abstract)$ 389 | \begin{abstract} 390 | $abstract$ 391 | \end{abstract} 392 | $endif$ 393 | 394 | \marginpar{ 395 | %\hrule 396 | \sffamily\small 397 | 398 | {\bfseries DOI:} \href{https://doi.org/$formatted_doi$}{\color{linky}{$formatted_doi$}} 399 | 400 | \vspace{2mm} 401 | 402 | {\bfseries Software} 403 | \begin{itemize} 404 | \setlength\itemsep{0em} 405 | \item \href{$review_issue_url$}{\color{linky}{Review}} \ExternalLink 406 | \item \href{$repository$}{\color{linky}{Repository}} \ExternalLink 407 | \item \href{$archive_doi$}{\color{linky}{Archive}} \ExternalLink 408 | \end{itemize} 409 | 410 | \vspace{2mm} 411 | 412 | {\bfseries Submitted:} $submitted$\\ 413 | {\bfseries Published:} $published$ 414 | 415 | \vspace{2mm} 416 | {\bfseries License}\\ 417 | Authors of papers retain copyright and release the work under a Creative Commons Attribution 4.0 International License (\href{http://creativecommons.org/licenses/by/4.0/}{\color{linky}{CC-BY}}). 418 | } 419 | 420 | $for(include-before)$ 421 | $include-before$ 422 | 423 | $endfor$ 424 | $if(toc)$ 425 | { 426 | $if(colorlinks)$ 427 | \hypersetup{linkcolor=$if(toccolor)$$toccolor$$else$black$endif$} 428 | $endif$ 429 | \setcounter{tocdepth}{$toc-depth$} 430 | \tableofcontents 431 | } 432 | $endif$ 433 | $if(lot)$ 434 | \listoftables 435 | $endif$ 436 | $if(lof)$ 437 | \listoffigures 438 | $endif$ 439 | $body$ 440 | 441 | $if(natbib)$ 442 | $if(bibliography)$ 443 | $if(biblio-title)$ 444 | $if(book-class)$ 445 | \renewcommand\bibname{$biblio-title$} 446 | $else$ 447 | \renewcommand\refname{$biblio-title$} 448 | $endif$ 449 | $endif$ 450 | \bibliography{$for(bibliography)$$bibliography$$sep$,$endfor$} 451 | $endif$ 452 | $endif$ 453 | $if(biblatex)$ 454 | \printbibliography$if(biblio-title)$[title=$biblio-title$]$endif$ 455 | $endif$ 456 | $for(include-after)$ 457 | $include-after$ 458 | 459 | $endfor$ 460 | \end{document} -------------------------------------------------------------------------------- /paper/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/paper/assets/logo.png -------------------------------------------------------------------------------- /paper/experiment/README.md: -------------------------------------------------------------------------------- 1 | ## Motivation 2 | - To prove the utility of `Pydiogment` and the utility of the implemented augmentation techniques, we display its effect on a **spoken emotions recognition task**. 3 | 4 | ## Spoken Emotions Recognition Experiment 5 | 6 | ### Description 7 | We use the [Emo-DB](http://emodb.bilderbar.info/index-1280.html) as a starting point, which is a small German audio data-set simulating 7 different emotions (neutral, sadness, anger, boredom, fear, happiness, disgust). We choose the [Mel-Frequency Cepstral Coefficients (MFCCs)](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum) as the characterizing low-level audio features due to previous proved success on similar problems. The features are extracted using the [python_speech_features](https://github.com/jameslyons/python_speech_features) library. 8 | 9 | 10 | In a first phase and using the [scikit-learn library](https://scikit-learn.org/stable/), we apply various recognition algorithms on the original data such as K-Nearest Neighbors (KNN), random forests, decision trees, Support Vector Machines (SVM) etc. 11 | In a second phase, we augment the data using `Pydiogment` by applying the following techniques: 12 | 13 | - slow down samples using a coefficient of 0.8. 14 | - speed up samples coefficient of 1.2. 15 | - randomly crop samples with a minimum length of 1 second. 16 | - add noise with an SNR = 10 17 | - add a fade in and fade out effect. 18 | - apply gain of -100 dB. 19 | - apply gain of -50 dB. 20 | - convolve with noise file using a level = 10**-2.75. 21 | - shift time with one second (1 sec) to the right (direction = right) 22 | - shift time with one second (1 sec) to the left (direction = left) 23 | - change tone with tone coefficient equal to 0.9. 24 | - change tone with tone coefficient equal to 1.1. 25 | 26 | The results are under `results.txt`. 27 | 28 | 29 | ### How to replicate 30 | To replicate the experiment, please follow these instructions: 31 | 1. To get the data: `python3 _1_get_data.py` 32 | 2. To generate augmented data: `python3 _2_gen_augmentations.py` 33 | 3. To extract the features: `python3 _3_get_features.py` 34 | 4. To run the experiment: `python3 _4_run_experiment.py` 35 | 36 | To pass the the experiment's output to a results file use: 37 | `python3 _4_run_experiment.py > results.txt` 38 | -------------------------------------------------------------------------------- /paper/experiment/_1_get_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import io 3 | import os 4 | import shutil 5 | import zipfile 6 | import requests 7 | import subprocess 8 | 9 | 10 | def download_and_extract_zip(zip_url): 11 | """ 12 | Dowload and extract data .zip file. 13 | """ 14 | r = requests.get(zip_url) 15 | z = zipfile.ZipFile(io.BytesIO(r.content)) 16 | z.extractall() 17 | 18 | 19 | def manage(waves_folder, data_folder): 20 | """ 21 | Organise the emotions folder. 22 | """ 23 | # define emotions list 24 | emotions_list = {"W": "anger", "L": "boredom", "E": "disgust", 25 | "A": "fear", "F": "happiness", "T": "sadness", 26 | "N": "neutral"} 27 | 28 | # create emotions folders 29 | for emotion in emotions_list.values(): 30 | # create a folder for the data 31 | try: 32 | os.mkdir(data_folder + emotion) 33 | print(data_folder + emotion, "was created ...") 34 | os.mkdir(data_folder + emotion) 35 | 36 | except BaseException: 37 | print("Ecxception raised: ", data_folder + emotion, 38 | "could not be created ...") 39 | 40 | # get all filenames 41 | file_paths = [waves_folder + f for f in os.listdir(waves_folder)] 42 | 43 | for file_path in file_paths: 44 | file_name = file_path.split("/")[-1] 45 | emotion = emotions_list[file_name[-6]] + "/" 46 | # copy file to associcated destination 47 | shutil.copyfile(file_path, data_folder + emotion + file_name) 48 | 49 | 50 | def prepare_data(zip_url="http://emodb.bilderbar.info/download/download.zip"): 51 | """ 52 | Prepare the data by cleaning unneeded folders. 53 | """ 54 | # download and extract data 55 | download_and_extract_zip(zip_url) 56 | 57 | # remove un-needed folders and files 58 | _ = subprocess.Popen(["rm", "-rf", "lablaut", "labsilb", "silb"], 59 | stdin=subprocess.PIPE, 60 | stdout=subprocess.PIPE, 61 | stderr=subprocess.PIPE) 62 | _ = subprocess.Popen(["rm", "-f", "erklaerung.txt", "erkennung.txt"], 63 | stdin=subprocess.PIPE, 64 | stdout=subprocess.PIPE, 65 | stderr=subprocess.PIPE) 66 | # organize data 67 | if not os.path.exists("data/waves/") : 68 | os.mkdir("data/waves/") 69 | 70 | # manage 71 | manage("wav/", "data/waves/") 72 | _ = subprocess.Popen(["rm", "-rf", "wav"], stdin=subprocess.PIPE, 73 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) 74 | 75 | 76 | if __name__ == "__main__": 77 | prepare_data() 78 | -------------------------------------------------------------------------------- /paper/experiment/_2_gen_augmentations.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pydiogment import auga, augf, augt 3 | 4 | 5 | def augment_file(test_file): 6 | """ 7 | Generate audio augmentations from one file. 8 | """ 9 | try: 10 | augt.slowdown(test_file, coefficient=0.8) 11 | except Exception: 12 | print("cannot %10s for %20s" % ("slowdown", test_file)) 13 | 14 | try: 15 | augt.speed(test_file, coefficient=1.2) 16 | except Exception: 17 | print("cannot %10s for %20s" % ("speed", test_file)) 18 | 19 | try: 20 | augt.random_cropping(test_file, 1) 21 | except Exception: 22 | print("cannot %10s for %20s" % ("random_cropping", test_file)) 23 | 24 | try: 25 | augt.shift_time(test_file, 1, "right") 26 | except Exception: 27 | print("cannot %10s for %20s" % ("shift_time", test_file)) 28 | 29 | try: 30 | augt.shift_time(test_file, 1, "left") 31 | except Exception: 32 | print("cannot %10s for %20s" % ("shift_time", test_file)) 33 | 34 | try: 35 | auga.add_noise(test_file, 10) 36 | except Exception: 37 | print("cannot %10s for %20s" % ("add_noise", test_file)) 38 | 39 | try: 40 | auga.fade_in_and_out(test_file) 41 | except Exception: 42 | print("cannot %10s for %20s" % ("fade_in_and_out", test_file)) 43 | 44 | try: 45 | auga.apply_gain(test_file, -100) 46 | except Exception: 47 | print("cannot %10s for %20s" % ("apply_gain", test_file)) 48 | 49 | try: 50 | auga.apply_gain(test_file, -50) 51 | except Exception: 52 | print("cannot %10s for %20s" % ("apply_gain", test_file)) 53 | 54 | try: 55 | augf.convolve(test_file, "noise", 10**-2.75) 56 | except Exception: 57 | print("cannot %10s for %20s" % ("convolve", test_file)) 58 | 59 | try: 60 | augf.change_tone(test_file, .9) 61 | except Exception: 62 | print("cannot %10s for %20s" % ("change_tone", test_file)) 63 | 64 | try: 65 | augf.change_tone(test_file, 1.1) 66 | except Exception: 67 | print("cannot %10s for %20s" % ("change_tone", test_file)) 68 | 69 | 70 | if __name__ == "__main__": 71 | folder = "data/waves/" 72 | 73 | # collect paths to wave files 74 | wave_fnames = [os.path.join(root, file) 75 | for root, dirs, files in os.walk(folder) 76 | for file in files] 77 | 78 | # print 79 | print("-" * 61) 80 | print(" Start Augmenting ") 81 | print("-" * 61) 82 | 83 | # augment files 84 | for wave_fname in wave_fnames[:]: 85 | augment_file(wave_fname) 86 | print("-" * 61) 87 | -------------------------------------------------------------------------------- /paper/experiment/_3_get_features.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import numpy as np 4 | import pandas as pd 5 | import scipy.io.wavfile 6 | import python_speech_features as psf 7 | 8 | 9 | def get_file_features(wav_fname, num_ceps): 10 | """ 11 | Extract mfcc features from a file. 12 | """ 13 | # read wave 14 | fs, sig = scipy.io.wavfile.read(wav_fname) 15 | 16 | # get mfccs 17 | mfccs = psf.mfcc(sig, samplerate=fs, winlen=0.025, winstep=0.01, 18 | numcep=num_ceps, nfilt=26, nfft=512, lowfreq=0, 19 | highfreq=None, preemph=0.97, ceplifter=22, 20 | appendEnergy=False) 21 | 22 | # compute mfcc means 23 | mfcc_means = np.round(mfccs.mean(axis=0), 3) 24 | return mfcc_means 25 | 26 | 27 | def extract_features(folder, num_ceps, fname, augmented=False): 28 | """ 29 | Extract features from files. 30 | """ 31 | # collect paths to wave files 32 | wave_fnames = [os.path.join(root, file) 33 | for root, dirs, files in os.walk(folder) for file in files] 34 | 35 | # init features & errors and column names 36 | features = [] 37 | errors_caused = [] 38 | 39 | # in case augmented data is processed 40 | if augmented: wave_fnames = [fname for fname in wave_fnames if "augment" in fname] 41 | else : wave_fnames = [fname for fname in wave_fnames if "augment" not in fname] 42 | 43 | # get voice features 44 | for wave_fname in wave_fnames[:]: 45 | try: 46 | feats = get_file_features(wave_fname, num_ceps) 47 | features.append([wave_fname] + [x for x in list(feats)] + [wave_fname.split("/")[-2]]) 48 | except: 49 | print("Error: error occured when processing ", wave_fname) 50 | errors_caused.append(wave_fname) 51 | 52 | # define column names for csv 53 | column_names = ["file_name"] + ["mfcc" + str(i) for i in range(num_ceps)] + ["emotion"] 54 | 55 | # export results to file 56 | data = pd.DataFrame(features, columns=column_names) 57 | data.to_csv(fname) 58 | return errors_caused 59 | 60 | 61 | if __name__ == "__main__": 62 | _ = extract_features(folder="data/waves", num_ceps=13, fname="data/features.csv") 63 | _ = extract_features(folder="data/waves", num_ceps=13, fname="data/augmented_features.csv", augmented=True) 64 | -------------------------------------------------------------------------------- /paper/experiment/_4_run_experiment.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import time 4 | import warnings 5 | import numpy as np 6 | import pandas as pd 7 | from utils import cmat 8 | from sklearn import metrics 9 | import matplotlib.pyplot as plt 10 | from utils.classifiers_config import classifiers 11 | from sklearn.metrics import classification_report 12 | from sklearn.preprocessing import StandardScaler, MinMaxScaler 13 | from utils.dataproc import get_data, balance_dataset, pickle_save 14 | from sklearn.model_selection import cross_val_score, train_test_split 15 | 16 | warnings.filterwarnings("ignore") 17 | 18 | # global variables 19 | scalers = {"Standard": StandardScaler(), "MinMax" : MinMaxScaler()} 20 | 21 | 22 | def train_model(data, model_fname, scale, classifier_name, visualizations=False): 23 | """ 24 | Train ML model. 25 | """ 26 | # drop filenames 27 | data = data.drop(["file_name"], axis=1) 28 | # balance data set 29 | balanced_data, X, y, column_names = balance_dataset(data) 30 | 31 | # split data 32 | x_train, x_test, y_train, y_test = train_test_split(X, y, 33 | test_size=0.3, 34 | random_state=22, 35 | shuffle=True) 36 | 37 | # init scaler and fit data to scaler 38 | if scale: 39 | scaler = MinMaxScaler() 40 | scaler.fit(x_train) 41 | x_train = scaler.transform(x_train) 42 | x_test = scaler.transform(x_test) 43 | 44 | # run classification 45 | t = time.time() 46 | classifier = classifiers[classifier_name] 47 | try: 48 | # initialize the classifier 49 | clf = classifier 50 | clf.fit(x_train, y_train) 51 | 52 | # training duration 53 | training_duration = time.time() - t 54 | 55 | # score using cross-validation 56 | cval_scores = cross_val_score(clf, x_train, y_train, cv=5) 57 | test_score = clf.score(X=x_test, y=y_test) 58 | 59 | # print scores 60 | print("----------------------------------------------------------") 61 | print(classifier_name) 62 | print("----------------------------------------------------------") 63 | print("Cross-validation scores : ", np.round(cval_scores, 3)) 64 | print("Testing score : ", np.round(test_score, 3)) 65 | print("----------------------------------------------------------") 66 | 67 | # predict data 68 | y_pred = clf.predict(x_test) 69 | target_names = data.emotion.unique() 70 | print(classification_report(y_test, y_pred, target_names=target_names)) 71 | 72 | # compute accuraccies and confusion matrix 73 | accuracy = metrics.accuracy_score(y_test, y_pred) 74 | confusion_matrix = metrics.confusion_matrix(y_test, y_pred) 75 | 76 | if visualizations: 77 | # plot non-normalized confusion matrix for testing set 78 | cmat.plot_confusion_matrix(y_test, 79 | y_pred, 80 | classes=y.emotion.unique(), 81 | normalize=False, 82 | title="Confusion matrix, without normalization") 83 | plt.show() 84 | 85 | except Exception as e: 86 | print("Error: faced error when testing ", classifier_name) 87 | print(e) 88 | return 89 | 90 | print("Training's duration is", np.round(time.time() - t, 3)) 91 | # export model to file 92 | pickle_save(clf, "models/" + model_fname) 93 | 94 | 95 | if __name__ == "__main__": 96 | available_classifiers = ['K-Nearest Neighbors (distance weights)', 97 | 'K-Nearest Neighbors (uniform weights)', 98 | 'Gaussian Process', 'Decision Tree', 99 | 'Random Forest', 'AdaBoost', 'Naive Bayes', 100 | 'Quadratic Discriminant Analysis', 101 | 'Linear SVM', 'MLP Classifier', 102 | 'Extra Trees Classifier'] 103 | 104 | # get data from original and augmented files 105 | original_data = get_data("data/features.csv")[0] 106 | augmented_data = get_data("data/augmented_features.csv")[0] 107 | all_data = pd.concat([original_data, augmented_data]) 108 | 109 | # make dir 110 | if not (os.path.isdir("models")): os.mkdir("models") 111 | 112 | # experiment 113 | for clf in available_classifiers[:]: 114 | try: 115 | print("----------------------------------------------------------") 116 | print("******************* NO AUGMENTATION **********************") 117 | # train using original data then original + augmented data 118 | train_model(original_data, 119 | model_fname="_".join(clf.split(" ")) + "_with_min_max.model", 120 | scale=False, 121 | classifier_name=clf, 122 | visualizations=False) 123 | 124 | print("----------------------------------------------------------") 125 | print("******************* WITH AUGMENTATION ********************") 126 | # train using original + augmented data 127 | train_model(all_data, 128 | model_fname="_".join(clf.split(" ")) + "_with_min_max.model", 129 | scale=False, 130 | classifier_name=clf, 131 | visualizations=False) 132 | 133 | except Exception as e: 134 | print("ERROR: ", clf) 135 | print(e) 136 | -------------------------------------------------------------------------------- /paper/experiment/results.txt: -------------------------------------------------------------------------------- 1 | ---------------------------------------------------------- 2 | ******************* NO AUGMENTATION ********************** 3 | ---------------------------------------------------------- 4 | K-Nearest Neighbors (distance weights) 5 | ---------------------------------------------------------- 6 | Cross-validation scores : [0.667 0.578 0.667 0.6 0.667] 7 | Testing score : 0.629 8 | ---------------------------------------------------------- 9 | precision recall f1-score support 10 | 11 | neutral 0.67 0.86 0.75 14 12 | anger 0.57 0.31 0.40 13 13 | fear 0.64 0.60 0.62 15 14 | disgust 0.67 0.67 0.67 12 15 | sadness 0.56 0.47 0.51 19 16 | boredom 0.42 0.73 0.53 11 17 | happiness 1.00 0.85 0.92 13 18 | 19 | accuracy 0.63 97 20 | macro avg 0.65 0.64 0.63 97 21 | weighted avg 0.65 0.63 0.62 97 22 | 23 | Training's duration is 1.125 24 | ---------------------------------------------------------- 25 | ******************* WITH AUGMENTATION ******************** 26 | ---------------------------------------------------------- 27 | K-Nearest Neighbors (distance weights) 28 | ---------------------------------------------------------- 29 | Cross-validation scores : [0.838 0.836 0.8 0.798 0.871] 30 | Testing score : 0.867 31 | ---------------------------------------------------------- 32 | precision recall f1-score support 33 | 34 | neutral 0.88 0.89 0.88 146 35 | anger 0.83 0.80 0.81 127 36 | fear 0.89 0.90 0.89 151 37 | disgust 0.84 0.87 0.86 127 38 | sadness 0.87 0.81 0.84 150 39 | boredom 0.85 0.82 0.84 141 40 | happiness 0.91 0.98 0.95 124 41 | 42 | accuracy 0.87 966 43 | macro avg 0.87 0.87 0.87 966 44 | weighted avg 0.87 0.87 0.87 966 45 | 46 | Training's duration is 0.362 47 | ---------------------------------------------------------- 48 | ******************* NO AUGMENTATION ********************** 49 | ---------------------------------------------------------- 50 | K-Nearest Neighbors (uniform weights) 51 | ---------------------------------------------------------- 52 | Cross-validation scores : [0.622 0.644 0.578 0.667 0.467] 53 | Testing score : 0.567 54 | ---------------------------------------------------------- 55 | precision recall f1-score support 56 | 57 | neutral 0.67 0.57 0.62 14 58 | anger 0.50 0.31 0.38 13 59 | fear 0.61 0.73 0.67 15 60 | disgust 0.45 0.42 0.43 12 61 | sadness 0.50 0.42 0.46 19 62 | boredom 0.44 0.73 0.55 11 63 | happiness 0.79 0.85 0.81 13 64 | 65 | accuracy 0.57 97 66 | macro avg 0.57 0.57 0.56 97 67 | weighted avg 0.57 0.57 0.56 97 68 | 69 | Training's duration is 0.303 70 | ---------------------------------------------------------- 71 | ******************* WITH AUGMENTATION ******************** 72 | ---------------------------------------------------------- 73 | K-Nearest Neighbors (uniform weights) 74 | ---------------------------------------------------------- 75 | Cross-validation scores : [0.803 0.738 0.741 0.78 0.771] 76 | Testing score : 0.777 77 | ---------------------------------------------------------- 78 | precision recall f1-score support 79 | 80 | neutral 0.79 0.79 0.79 146 81 | anger 0.71 0.61 0.66 127 82 | fear 0.86 0.86 0.86 151 83 | disgust 0.75 0.81 0.78 127 84 | sadness 0.75 0.69 0.72 150 85 | boredom 0.70 0.78 0.74 141 86 | happiness 0.87 0.90 0.89 124 87 | 88 | accuracy 0.78 966 89 | macro avg 0.78 0.78 0.78 966 90 | weighted avg 0.78 0.78 0.78 966 91 | 92 | Training's duration is 0.467 93 | ---------------------------------------------------------- 94 | ******************* NO AUGMENTATION ********************** 95 | ---------------------------------------------------------- 96 | Gaussian Process 97 | ---------------------------------------------------------- 98 | Cross-validation scores : [0.178 0.2 0.156 0.156 0.244] 99 | Testing score : 0.247 100 | ---------------------------------------------------------- 101 | precision recall f1-score support 102 | 103 | neutral 1.00 0.21 0.35 14 104 | anger 0.00 0.00 0.00 13 105 | fear 1.00 0.13 0.24 15 106 | disgust 1.00 0.08 0.15 12 107 | sadness 1.00 0.21 0.35 19 108 | boredom 0.50 0.09 0.15 11 109 | happiness 0.16 1.00 0.27 13 110 | 111 | accuracy 0.25 97 112 | macro avg 0.67 0.25 0.22 97 113 | weighted avg 0.70 0.25 0.23 97 114 | 115 | Training's duration is 0.778 116 | ---------------------------------------------------------- 117 | ******************* WITH AUGMENTATION ******************** 118 | ---------------------------------------------------------- 119 | Gaussian Process 120 | ---------------------------------------------------------- 121 | Cross-validation scores : [0.619 0.601 0.594 0.641 0.622] 122 | Testing score : 0.7 123 | ---------------------------------------------------------- 124 | precision recall f1-score support 125 | 126 | neutral 1.00 0.50 0.67 146 127 | anger 0.92 0.55 0.69 127 128 | fear 0.98 0.74 0.84 151 129 | disgust 0.99 0.72 0.83 127 130 | sadness 0.97 0.74 0.84 150 131 | boredom 0.97 0.68 0.80 141 132 | happiness 0.31 1.00 0.48 124 133 | 134 | accuracy 0.70 966 135 | macro avg 0.88 0.70 0.73 966 136 | weighted avg 0.89 0.70 0.74 966 137 | 138 | Training's duration is 63.522 139 | ---------------------------------------------------------- 140 | ******************* NO AUGMENTATION ********************** 141 | ---------------------------------------------------------- 142 | Decision Tree 143 | ---------------------------------------------------------- 144 | Cross-validation scores : [0.378 0.378 0.467 0.378 0.511] 145 | Testing score : 0.454 146 | ---------------------------------------------------------- 147 | precision recall f1-score support 148 | 149 | neutral 0.46 0.43 0.44 14 150 | anger 0.40 0.46 0.43 13 151 | fear 0.44 0.53 0.48 15 152 | disgust 0.36 0.33 0.35 12 153 | sadness 0.43 0.32 0.36 19 154 | boredom 0.38 0.45 0.42 11 155 | happiness 0.69 0.69 0.69 13 156 | 157 | accuracy 0.45 97 158 | macro avg 0.45 0.46 0.45 97 159 | weighted avg 0.45 0.45 0.45 97 160 | 161 | Training's duration is 0.037 162 | ---------------------------------------------------------- 163 | ******************* WITH AUGMENTATION ******************** 164 | ---------------------------------------------------------- 165 | Decision Tree 166 | ---------------------------------------------------------- 167 | Cross-validation scores : [0.787 0.796 0.749 0.776 0.756] 168 | Testing score : 0.764 169 | ---------------------------------------------------------- 170 | precision recall f1-score support 171 | 172 | neutral 0.81 0.73 0.77 146 173 | anger 0.73 0.72 0.73 127 174 | fear 0.75 0.77 0.76 151 175 | disgust 0.68 0.80 0.73 127 176 | sadness 0.74 0.67 0.70 150 177 | boredom 0.80 0.81 0.81 141 178 | happiness 0.85 0.86 0.86 124 179 | 180 | accuracy 0.76 966 181 | macro avg 0.77 0.77 0.77 966 182 | weighted avg 0.77 0.76 0.76 966 183 | 184 | Training's duration is 0.205 185 | ---------------------------------------------------------- 186 | ******************* NO AUGMENTATION ********************** 187 | ---------------------------------------------------------- 188 | Random Forest 189 | ---------------------------------------------------------- 190 | Cross-validation scores : [0.689 0.6 0.556 0.578 0.511] 191 | Testing score : 0.598 192 | ---------------------------------------------------------- 193 | precision recall f1-score support 194 | 195 | neutral 0.48 1.00 0.65 14 196 | anger 0.60 0.23 0.33 13 197 | fear 0.82 0.60 0.69 15 198 | disgust 0.50 0.42 0.45 12 199 | sadness 0.75 0.32 0.44 19 200 | boredom 0.50 0.82 0.62 11 201 | happiness 0.75 0.92 0.83 13 202 | 203 | accuracy 0.60 97 204 | macro avg 0.63 0.61 0.57 97 205 | weighted avg 0.64 0.60 0.57 97 206 | 207 | Training's duration is 1.698 208 | ---------------------------------------------------------- 209 | ******************* WITH AUGMENTATION ******************** 210 | ---------------------------------------------------------- 211 | Random Forest 212 | ---------------------------------------------------------- 213 | Cross-validation scores : [0.882 0.9 0.891 0.885 0.873] 214 | Testing score : 0.887 215 | ---------------------------------------------------------- 216 | precision recall f1-score support 217 | 218 | neutral 0.87 0.94 0.90 146 219 | anger 0.90 0.83 0.87 127 220 | fear 0.94 0.90 0.92 151 221 | disgust 0.82 0.89 0.86 127 222 | sadness 0.90 0.81 0.86 150 223 | boredom 0.85 0.88 0.86 141 224 | happiness 0.93 0.96 0.94 124 225 | 226 | accuracy 0.89 966 227 | macro avg 0.89 0.89 0.89 966 228 | weighted avg 0.89 0.89 0.89 966 229 | 230 | Training's duration is 2.205 231 | ---------------------------------------------------------- 232 | ******************* NO AUGMENTATION ********************** 233 | ---------------------------------------------------------- 234 | AdaBoost 235 | ---------------------------------------------------------- 236 | Cross-validation scores : [0.333 0.422 0.267 0.311 0.289] 237 | Testing score : 0.309 238 | ---------------------------------------------------------- 239 | precision recall f1-score support 240 | 241 | neutral 0.53 0.57 0.55 14 242 | anger 0.33 0.08 0.12 13 243 | fear 0.00 0.00 0.00 15 244 | disgust 0.26 0.83 0.40 12 245 | sadness 0.50 0.16 0.24 19 246 | boredom 0.27 0.27 0.27 11 247 | happiness 0.38 0.38 0.38 13 248 | 249 | accuracy 0.31 97 250 | macro avg 0.33 0.33 0.28 97 251 | weighted avg 0.33 0.31 0.28 97 252 | 253 | Training's duration is 1.077 254 | ---------------------------------------------------------- 255 | ******************* WITH AUGMENTATION ******************** 256 | ---------------------------------------------------------- 257 | AdaBoost 258 | ---------------------------------------------------------- 259 | Cross-validation scores : [0.486 0.528 0.475 0.53 0.529] 260 | Testing score : 0.513 261 | ---------------------------------------------------------- 262 | precision recall f1-score support 263 | 264 | neutral 0.49 0.75 0.59 146 265 | anger 0.51 0.26 0.34 127 266 | fear 0.48 0.54 0.51 151 267 | disgust 0.52 0.49 0.50 127 268 | sadness 0.46 0.35 0.40 150 269 | boredom 0.49 0.47 0.48 141 270 | happiness 0.66 0.74 0.70 124 271 | 272 | accuracy 0.51 966 273 | macro avg 0.51 0.51 0.50 966 274 | weighted avg 0.51 0.51 0.50 966 275 | 276 | Training's duration is 3.889 277 | ---------------------------------------------------------- 278 | ******************* NO AUGMENTATION ********************** 279 | ---------------------------------------------------------- 280 | Naive Bayes 281 | ---------------------------------------------------------- 282 | Cross-validation scores : [0.533 0.533 0.6 0.533 0.489] 283 | Testing score : 0.66 284 | ---------------------------------------------------------- 285 | precision recall f1-score support 286 | 287 | neutral 0.56 0.64 0.60 14 288 | anger 0.88 0.54 0.67 13 289 | fear 0.67 0.53 0.59 15 290 | disgust 0.62 0.83 0.71 12 291 | sadness 0.60 0.47 0.53 19 292 | boredom 0.62 0.73 0.67 11 293 | happiness 0.76 1.00 0.87 13 294 | 295 | accuracy 0.66 97 296 | macro avg 0.67 0.68 0.66 97 297 | weighted avg 0.67 0.66 0.65 97 298 | 299 | Training's duration is 0.032 300 | ---------------------------------------------------------- 301 | ******************* WITH AUGMENTATION ******************** 302 | ---------------------------------------------------------- 303 | Naive Bayes 304 | ---------------------------------------------------------- 305 | Cross-validation scores : [0.634 0.643 0.647 0.625 0.658] 306 | Testing score : 0.608 307 | ---------------------------------------------------------- 308 | precision recall f1-score support 309 | 310 | neutral 0.63 0.77 0.70 146 311 | anger 0.54 0.48 0.51 127 312 | fear 0.69 0.71 0.70 151 313 | disgust 0.48 0.48 0.48 127 314 | sadness 0.58 0.42 0.49 150 315 | boredom 0.57 0.61 0.59 141 316 | happiness 0.72 0.77 0.75 124 317 | 318 | accuracy 0.61 966 319 | macro avg 0.60 0.61 0.60 966 320 | weighted avg 0.60 0.61 0.60 966 321 | 322 | Training's duration is 0.064 323 | ---------------------------------------------------------- 324 | ******************* NO AUGMENTATION ********************** 325 | ---------------------------------------------------------- 326 | Quadratic Discriminant Analysis 327 | ---------------------------------------------------------- 328 | Cross-validation scores : [0.489 0.467 0.533 0.644 0.622] 329 | Testing score : 0.608 330 | ---------------------------------------------------------- 331 | precision recall f1-score support 332 | 333 | neutral 0.69 0.79 0.73 14 334 | anger 0.50 0.54 0.52 13 335 | fear 0.82 0.60 0.69 15 336 | disgust 0.47 0.67 0.55 12 337 | sadness 0.69 0.47 0.56 19 338 | boredom 0.31 0.36 0.33 11 339 | happiness 0.85 0.85 0.85 13 340 | 341 | accuracy 0.61 97 342 | macro avg 0.62 0.61 0.61 97 343 | weighted avg 0.63 0.61 0.61 97 344 | 345 | Training's duration is 0.037 346 | ---------------------------------------------------------- 347 | ******************* WITH AUGMENTATION ******************** 348 | ---------------------------------------------------------- 349 | Quadratic Discriminant Analysis 350 | ---------------------------------------------------------- 351 | Cross-validation scores : [0.741 0.765 0.734 0.712 0.7 ] 352 | Testing score : 0.764 353 | ---------------------------------------------------------- 354 | precision recall f1-score support 355 | 356 | neutral 0.77 0.77 0.77 146 357 | anger 0.73 0.72 0.73 127 358 | fear 0.73 0.86 0.79 151 359 | disgust 0.73 0.72 0.73 127 360 | sadness 0.73 0.63 0.67 150 361 | boredom 0.76 0.77 0.76 141 362 | happiness 0.91 0.89 0.90 124 363 | 364 | accuracy 0.76 966 365 | macro avg 0.77 0.76 0.76 966 366 | weighted avg 0.76 0.76 0.76 966 367 | 368 | Training's duration is 0.071 369 | ---------------------------------------------------------- 370 | ******************* NO AUGMENTATION ********************** 371 | ---------------------------------------------------------- 372 | Linear SVM 373 | ---------------------------------------------------------- 374 | Cross-validation scores : [0.556 0.6 0.6 0.644 0.6 ] 375 | Testing score : 0.608 376 | ---------------------------------------------------------- 377 | precision recall f1-score support 378 | 379 | neutral 0.67 0.71 0.69 14 380 | anger 0.62 0.62 0.62 13 381 | fear 0.42 0.33 0.37 15 382 | disgust 0.30 0.25 0.27 12 383 | sadness 0.68 0.68 0.68 19 384 | boredom 0.64 0.82 0.72 11 385 | happiness 0.79 0.85 0.81 13 386 | 387 | accuracy 0.61 97 388 | macro avg 0.59 0.61 0.60 97 389 | weighted avg 0.59 0.61 0.60 97 390 | 391 | Training's duration is 0.044 392 | ---------------------------------------------------------- 393 | ******************* WITH AUGMENTATION ******************** 394 | ---------------------------------------------------------- 395 | Linear SVM 396 | ---------------------------------------------------------- 397 | Cross-validation scores : [0.681 0.694 0.712 0.721 0.684] 398 | Testing score : 0.693 399 | ---------------------------------------------------------- 400 | precision recall f1-score support 401 | 402 | neutral 0.70 0.82 0.75 146 403 | anger 0.59 0.54 0.57 127 404 | fear 0.76 0.68 0.72 151 405 | disgust 0.64 0.70 0.67 127 406 | sadness 0.66 0.55 0.60 150 407 | boredom 0.65 0.61 0.63 141 408 | happiness 0.83 0.97 0.89 124 409 | 410 | accuracy 0.69 966 411 | macro avg 0.69 0.70 0.69 966 412 | weighted avg 0.69 0.69 0.69 966 413 | 414 | Training's duration is 0.753 415 | ---------------------------------------------------------- 416 | ******************* NO AUGMENTATION ********************** 417 | ---------------------------------------------------------- 418 | MLP Classifier 419 | ---------------------------------------------------------- 420 | Cross-validation scores : [0.644 0.622 0.556 0.622 0.644] 421 | Testing score : 0.608 422 | ---------------------------------------------------------- 423 | precision recall f1-score support 424 | 425 | neutral 0.67 0.71 0.69 14 426 | anger 0.53 0.62 0.57 13 427 | fear 0.82 0.60 0.69 15 428 | disgust 0.58 0.58 0.58 12 429 | sadness 0.48 0.58 0.52 19 430 | boredom 0.44 0.36 0.40 11 431 | happiness 0.83 0.77 0.80 13 432 | 433 | accuracy 0.61 97 434 | macro avg 0.62 0.60 0.61 97 435 | weighted avg 0.62 0.61 0.61 97 436 | 437 | Training's duration is 1.082 438 | ---------------------------------------------------------- 439 | ******************* WITH AUGMENTATION ******************** 440 | ---------------------------------------------------------- 441 | MLP Classifier 442 | ---------------------------------------------------------- 443 | Cross-validation scores : [0.772 0.641 0.696 0.681 0.749] 444 | Testing score : 0.811 445 | ---------------------------------------------------------- 446 | precision recall f1-score support 447 | 448 | neutral 0.92 0.78 0.84 146 449 | anger 0.76 0.79 0.77 127 450 | fear 0.89 0.77 0.83 151 451 | disgust 0.74 0.87 0.80 127 452 | sadness 0.74 0.79 0.76 150 453 | boredom 0.74 0.77 0.76 141 454 | happiness 0.93 0.94 0.93 124 455 | 456 | accuracy 0.81 966 457 | macro avg 0.82 0.81 0.81 966 458 | weighted avg 0.82 0.81 0.81 966 459 | 460 | Training's duration is 8.049 461 | ---------------------------------------------------------- 462 | ******************* NO AUGMENTATION ********************** 463 | ---------------------------------------------------------- 464 | Extra Trees Classifier 465 | ---------------------------------------------------------- 466 | Cross-validation scores : [0.711 0.578 0.533 0.644 0.711] 467 | Testing score : 0.567 468 | ---------------------------------------------------------- 469 | precision recall f1-score support 470 | 471 | neutral 0.50 0.79 0.61 14 472 | anger 0.29 0.15 0.20 13 473 | fear 0.59 0.67 0.62 15 474 | disgust 0.58 0.58 0.58 12 475 | sadness 0.83 0.26 0.40 19 476 | boredom 0.40 0.73 0.52 11 477 | happiness 0.92 0.92 0.92 13 478 | 479 | accuracy 0.57 97 480 | macro avg 0.59 0.59 0.55 97 481 | weighted avg 0.61 0.57 0.54 97 482 | 483 | Training's duration is 1.797 484 | ---------------------------------------------------------- 485 | ******************* WITH AUGMENTATION ******************** 486 | ---------------------------------------------------------- 487 | Extra Trees Classifier 488 | ---------------------------------------------------------- 489 | Cross-validation scores : [0.869 0.902 0.909 0.918 0.902] 490 | Testing score : 0.929 491 | ---------------------------------------------------------- 492 | precision recall f1-score support 493 | 494 | neutral 0.92 0.99 0.95 146 495 | anger 0.91 0.91 0.91 127 496 | fear 0.97 0.92 0.95 151 497 | disgust 0.87 0.96 0.91 127 498 | sadness 0.95 0.83 0.89 150 499 | boredom 0.94 0.92 0.93 141 500 | happiness 0.95 0.98 0.96 124 501 | 502 | accuracy 0.93 966 503 | macro avg 0.93 0.93 0.93 966 504 | weighted avg 0.93 0.93 0.93 966 505 | 506 | Training's duration is 1.853 507 | -------------------------------------------------------------------------------- /paper/experiment/sounds/ir_classroom.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/paper/experiment/sounds/ir_classroom.wav -------------------------------------------------------------------------------- /paper/experiment/sounds/ir_smartphone_mic.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/paper/experiment/sounds/ir_smartphone_mic.wav -------------------------------------------------------------------------------- /paper/experiment/sounds/tel_noise.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/paper/experiment/sounds/tel_noise.wav -------------------------------------------------------------------------------- /paper/experiment/sounds/white_noise.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/paper/experiment/sounds/white_noise.wav -------------------------------------------------------------------------------- /paper/experiment/utils/classifiers_config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import random 3 | from sklearn.svm import SVC 4 | import multiprocessing as mp 5 | from sklearn.naive_bayes import GaussianNB 6 | from sklearn.linear_model import SGDClassifier 7 | from sklearn.tree import DecisionTreeClassifier 8 | from sklearn.neural_network import MLPClassifier 9 | from sklearn.neighbors import KNeighborsClassifier 10 | from sklearn.gaussian_process import GaussianProcessClassifier 11 | from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis 12 | from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier 13 | 14 | # init glabal variables 15 | n_jobs = mp.cpu_count() 16 | 17 | # init classifiers 18 | classifiers = { 19 | "K-Nearest Neighbors (distance weights)": 20 | KNeighborsClassifier(n_neighbors=5, 21 | weights='distance', 22 | algorithm='auto', 23 | leaf_size=30, 24 | p=2, 25 | metric='minkowski', 26 | metric_params=None, 27 | n_jobs=n_jobs), 28 | 29 | "K-Nearest Neighbors (uniform weights)": 30 | KNeighborsClassifier(n_neighbors=5, 31 | weights='uniform', 32 | algorithm='auto', 33 | leaf_size=30, 34 | p=2, 35 | metric='minkowski', 36 | metric_params=None, 37 | n_jobs=n_jobs), 38 | 39 | "Gaussian Process": 40 | GaussianProcessClassifier(kernel=None, 41 | optimizer='fmin_l_bfgs_b', 42 | n_restarts_optimizer=0, 43 | max_iter_predict=100, 44 | warm_start=False, 45 | copy_X_train=True, 46 | random_state=random.seed(42), 47 | multi_class='one_vs_rest', 48 | n_jobs=n_jobs), 49 | 50 | "Decision Tree": 51 | DecisionTreeClassifier(criterion='gini', 52 | splitter='best', 53 | max_depth=None, 54 | min_samples_split=2, 55 | min_samples_leaf=1, 56 | min_weight_fraction_leaf=0.0, 57 | max_features=None, 58 | random_state=random.seed(42), 59 | max_leaf_nodes=None, 60 | min_impurity_decrease=0.0, 61 | min_impurity_split=None, 62 | class_weight=None, 63 | presort='deprecated'), 64 | 65 | "Random Forest": 66 | RandomForestClassifier(n_estimators=100, 67 | criterion='gini', 68 | max_depth=None, 69 | min_samples_split=2, 70 | min_samples_leaf=1, 71 | min_weight_fraction_leaf=0.0, 72 | max_features='auto', 73 | max_leaf_nodes=None, 74 | min_impurity_decrease=0.0, 75 | min_impurity_split=None, 76 | bootstrap=True, 77 | oob_score=False, 78 | n_jobs=n_jobs, 79 | random_state=random.seed(42), 80 | verbose=0, 81 | warm_start=False, 82 | class_weight=None), 83 | 84 | "AdaBoost": 85 | AdaBoostClassifier(base_estimator=None, 86 | n_estimators=100, 87 | learning_rate=1.0, 88 | algorithm='SAMME.R', 89 | random_state=random.seed(42)), 90 | 91 | "Naive Bayes": 92 | GaussianNB(priors=None, var_smoothing=1e-09), 93 | "Quadratic Discriminant Analysis": 94 | QuadraticDiscriminantAnalysis(priors=None, 95 | reg_param=0.0, 96 | store_covariance=False, 97 | tol=0.0001), 98 | 99 | "Linear SVM": 100 | SVC(C=0.025, 101 | kernel='linear', 102 | degree=3, 103 | gamma='auto', 104 | coef0=0.0, 105 | shrinking=True, 106 | probability=False, 107 | tol=0.001, 108 | cache_size=200, 109 | class_weight=None, 110 | verbose=False, 111 | max_iter=-1, 112 | decision_function_shape='ovr', 113 | random_state=random.seed(42)), 114 | 115 | "RBF SVM": 116 | SVC(C=0.025, 117 | kernel='rbf', 118 | degree=3, 119 | gamma='auto', 120 | coef0=0.0, 121 | shrinking=True, 122 | probability=False, 123 | tol=0.001, 124 | cache_size=200, 125 | class_weight=None, 126 | verbose=False, 127 | max_iter=-1, 128 | decision_function_shape='ovr', 129 | random_state=random.seed(42)), 130 | 131 | "MLP Classifier": 132 | MLPClassifier(hidden_layer_sizes=(100, ), 133 | activation='relu', 134 | solver='adam', 135 | alpha=0.0001, 136 | batch_size='auto', 137 | learning_rate='constant', 138 | learning_rate_init=0.001, 139 | power_t=0.5, 140 | max_iter=200, 141 | shuffle=True, 142 | random_state=random.seed(42), 143 | tol=0.0001, 144 | verbose=False, 145 | warm_start=False, 146 | momentum=0.9, 147 | nesterovs_momentum=True, 148 | early_stopping=False, 149 | validation_fraction=0.1, 150 | beta_1=0.9, 151 | beta_2=0.999, 152 | epsilon=1e-08, 153 | n_iter_no_change=10), 154 | 155 | "Extra Trees Classifier": 156 | ExtraTreesClassifier(n_estimators=100, 157 | criterion='gini', 158 | max_depth=None, 159 | min_samples_split=2, 160 | min_samples_leaf=1, 161 | min_weight_fraction_leaf=0.0, 162 | max_features='auto', 163 | max_leaf_nodes=None, 164 | min_impurity_decrease=0.0, 165 | min_impurity_split=None, 166 | bootstrap=False, 167 | oob_score=False, 168 | n_jobs=n_jobs, 169 | random_state=random.seed(42), 170 | verbose=0, 171 | warm_start=False, 172 | class_weight=None), 173 | 174 | "SGD Classifier": 175 | SGDClassifier(loss='hinge', 176 | penalty='l2', 177 | alpha=0.0001, 178 | l1_ratio=0.15, 179 | fit_intercept=True, 180 | max_iter=1000, 181 | tol=0.001, 182 | shuffle=True, 183 | verbose=0, 184 | epsilon=0.1, 185 | n_jobs=n_jobs, 186 | random_state=None, 187 | learning_rate='optimal', 188 | eta0=0.0, 189 | power_t=0.5, 190 | early_stopping=False, 191 | validation_fraction=0.1, 192 | n_iter_no_change=5, 193 | class_weight=None, 194 | warm_start=False, 195 | average=False) 196 | } 197 | -------------------------------------------------------------------------------- /paper/experiment/utils/cmat.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.metrics import confusion_matrix 4 | 5 | 6 | def plot_confusion_matrix(y_true, 7 | y_pred, 8 | classes, 9 | normalize=False, 10 | title=None, 11 | cmap=plt.cm.Blues): 12 | """ 13 | This function prints and plots the confusion matrix. 14 | Normalization can be applied by setting `normalize=True`. 15 | """ 16 | if not title: 17 | if normalize: 18 | title = 'Normalized confusion matrix' 19 | else: 20 | title = 'Confusion matrix, without normalization' 21 | 22 | # compute confusion matrix 23 | cm = confusion_matrix(y_true, y_pred) 24 | if normalize: 25 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 26 | print("Normalized confusion matrix") 27 | else: 28 | print('Confusion matrix, without normalization') 29 | 30 | print(np.round(cm, 3)) 31 | 32 | # plot results 33 | fig, ax = plt.subplots() 34 | im = ax.imshow(cm, interpolation='nearest', cmap=cmap) 35 | ax.figure.colorbar(im, ax=ax) 36 | 37 | # show all ticks 38 | ax.set(xticks=np.arange(cm.shape[1]), 39 | yticks=np.arange(cm.shape[0]), 40 | # label them with the respective list entries 41 | xticklabels=classes, 42 | yticklabels=classes, 43 | # set title and axis labels 44 | title=title, 45 | ylabel='True label', 46 | xlabel='Predicted label') 47 | 48 | # rotate the tick labels and set their alignment. 49 | plt.setp(ax.get_xticklabels(), 50 | rotation=45, 51 | ha="right", 52 | rotation_mode="anchor") 53 | 54 | # loop over data dimensions and create text annotations. 55 | fmt = '.2f' if normalize else 'd' 56 | thresh = cm.max() / 2. 57 | for i in range(cm.shape[0]): 58 | for j in range(cm.shape[1]): 59 | ax.text(j, 60 | i, 61 | format(cm[i, j], fmt), 62 | ha="center", 63 | va="center", 64 | color="white" if cm[i, j] > thresh else "black") 65 | fig.tight_layout() 66 | plt.show() 67 | return ax 68 | -------------------------------------------------------------------------------- /paper/experiment/utils/dataproc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pickle 3 | import pandas as pd 4 | 5 | 6 | def pickle_save(object_to_save, fname): 7 | """ 8 | Save data to file using pickle, 9 | """ 10 | pickle.dump(object_to_save, open(fname, 'wb')) 11 | 12 | 13 | def pickle_load(fname): 14 | """ 15 | Load data from file using pickle. 16 | """ 17 | # load the model 18 | loaded_object = pickle.load(open(fname, 'rb')) 19 | return loaded_object 20 | 21 | 22 | def get_data(file_path, drop_columns=True, drop_nans=True): 23 | # load data 24 | data = pd.read_csv(file_path) 25 | 26 | # define column names 27 | column_names = list(data.columns) 28 | 29 | if drop_columns: 30 | # drop columns with nans 31 | for c in column_names: 32 | if not(any(x in c for x in ["file_name", "mfcc", "duration", "emotion"])): 33 | del data[c] 34 | 35 | # drop erroneous rows 36 | if drop_nans: 37 | data = data.dropna() 38 | 39 | # round data 40 | data = data.round(3) 41 | data = data.iloc[:, :] 42 | 43 | # re-define column names 44 | column_names = list(data.columns) 45 | return data, list(data.columns) 46 | 47 | 48 | def balance_dataset(data): 49 | # define column names 50 | column_names = list(data.columns) 51 | 52 | # assert equal number o samples per class 53 | samples_pro_emotion = {e: len(data[data.emotion == e]) for e in data.emotion.unique()} 54 | balanced_data = pd.concat([data[data.emotion == e].sample(min(samples_pro_emotion.values())) 55 | for e in data.emotion.unique()], 56 | axis=0, 57 | keys=list(data.columns)) 58 | 59 | # split data 60 | X = balanced_data.iloc[:, :-1] 61 | y = balanced_data.iloc[:, -1:].astype('category') 62 | # print("%25s : %s" % ("Data with balanced sets", str(balanced_data.shape))) 63 | return balanced_data, X, y, column_names 64 | -------------------------------------------------------------------------------- /paper/figs/bpass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/paper/figs/bpass.png -------------------------------------------------------------------------------- /paper/figs/hpass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/paper/figs/hpass.png -------------------------------------------------------------------------------- /paper/figs/lpass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/paper/figs/lpass.png -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | @article{Burkhardt:2005, 2 | title = {A database of German emotional speech}, 3 | author = {{Burkhardt, F., Paeschke, A., Rolfes, M., A., Walter F. Sendlmeier & Weiss, B.}}, 4 | journal = {INTERSPEECH}, 5 | year = {2005}, 6 | } 7 | 8 | 9 | @Misc{ffmpeg:2019, 10 | title = {FFmpeg tool (Version 4.2.2)}, 11 | author = {{FFmpeg Developers}}, 12 | year = {2019}, 13 | note = {[Software]}, 14 | url = {http://ffmpeg.org/}, 15 | note = {[Online; accessed 14.02.2020]}, 16 | } 17 | 18 | 19 | @article{scipy:2019, 20 | title = {SciPy 1.0: fundamental algorithms for scientific computing in {Python}}, 21 | author = {Pauli Virtanen, Ralf Gommers, Travis E. Oliphant, Matt Haberland, 22 | Tyler Reddy, David Cournapeau, Evgeni Burovski, Pearu Peterson, 23 | Warren Weckesser, Jonathan Bright, Stefan van der Walt, Matthew Brett, 24 | Jarod N Wilson, K. Jarrod Millman, Nikolay Mayorov,,rew. Nelson, Eric Jones, 25 | Robert C. Kern, Eric Larson, Clary James Carey, Ilhan Polat, Y. F. Feng, 26 | Eric J Moore, J. Vanderplas, Denis Laxalde, Josef Perktold, Robert Cimrman, 27 | Ian Henriksen, E. A. Quintero, Charles R. Harris, Anne M. Archibald, 28 | Ant{\^o}nio H. Ribeiro, Fabian Pedregosa, Paul van Mulbregt, Aditya Alessandro Pietro Alex, 29 | reas,reas Anthony Ant Vijaykumar Bardelli Rothberg Hilboll Kloeckner Sco, Aditya Vijaykumar, 30 | Alessandro Pietro Bardelli, Alex Rothberg,,reas Hilboll,,reas Kl{\"o}ckner, 31 | Anthony M. Scopatz, Antony H. C. Lee, Ariel Rokem, C. Nathan Woods, Chad Fulton, 32 | Charles Masson, Christian H{\"a}ggstr{\"o}m, Clark Fitzgerald, David A. Nicholson, 33 | David R. Hagen, Dmitrii V. Pasechnik, Emanuele Olivetti, Eric A. Martin, Eric Wieser, 34 | Fabrice Silva, Felix Lenders, Florian Wilhelm, Gert Young, Gavin A. Price, Gert-Ludwig Ingold, 35 | Gregory E. Allen, Gregory R. Lee, Herv{\'e} Audren, Irvin Probst, Joerg P. Dietrich, 36 | Jacob Silterra, Jim Webber, Janko Slavi{\vc}, Joel Nothman, Johannes B. Buchner, 37 | Johannes Kulick, Johannes L. Sch{\"o}nberger, Jos{\'e} Vin{\'i}cius de Miranda Cardoso, 38 | Joscha Reimer, Joseph F. Harrington, Juan Luis Cano Rodr{\'i}guez, Juan Nunez-Iglesias, 39 | Justin Kuczynski, Kevin Tritz, Martin Thoma, Matthew Newville, Matthias K{\"u}mmerer, 40 | Maximilian Bolingbroke, Michael Tartre, Mikhail Pak, Nathaniel J. Smith, Nikolai Nowaczyk, 41 | Nikolay Shebanov, Oleksandr Pavlyk, Per A. Brodtkorb, Perry Lee, Robert T. McGibbon, 42 | Roman Feldbauer, Sam M. Lewis, Sam Tygier, Scott Sievert, Sebastiano Vigna, 43 | Stefan Peterson, Surhud More, Tadeusz Pudlik, Takuya Oshima, Thomas Pingel, 44 | Thomas P. Robitaille, Thomas Spura, Thouis Raymond Jones, Tim Cera, Tim Leslie, 45 | Tiziano Zito, Tom Krauss, Utkarsh Upadhyay, Yaroslav O. Halchenko and Yoshiki V{\'a}zquez‐Baeza}, 46 | journal = {Nature Methods}, 47 | year = {2019}, 48 | pages = {1--12}, 49 | url = {http://www.scipy.org/}, 50 | note = {[Online; accessed 14.02.2020]}, 51 | doi = {10.1038/s41592-019-0686-2}, 52 | } 53 | 54 | 55 | @inproceedings{specaugment:2019, 56 | title = {SpecAugment: A Simple Augmentation Method for Automatic Speech Recognition}, 57 | author = {Daniel S. Park and William Chan and Yu Zhang and Chung-Cheng Chiu and Barret Zoph and Ekin Dogus Cubuk and Quoc V. Le}, 58 | year = {2019}, 59 | booktitle = {INTERSPEECH}, 60 | doi = {10.21437/interspeech.2019-2680}, 61 | } 62 | 63 | 64 | @inproceedings{muda:2015, 65 | author = {McFee,, B. and Humphrey, E.J. and Bello, J.P.}, 66 | year = {2015}, 67 | title = {A software framework for musical data augmentation}, 68 | booktitle = {16th International Society for Music Information Retrieval Conference}, 69 | series = {ISMIR}, 70 | url = {https://github.com/bmcfee/muda}, 71 | } 72 | 73 | 74 | @incollection{pytorch:2019, 75 | title = {PyTorch: An Imperative Style, High-Performance Deep Learning Library}, 76 | author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and Desmaison, Alban and Kopf, Andreas and Yang, Edward and DeVito, Zachary and Raison, Martin and Tejani, Alykhan and Chilamkurthy, Sasank and Steiner, Benoit and Fang, Lu and Bai, Junjie and Chintala, Soumith}, 77 | booktitle = {Advances in Neural Information Processing Systems 32}, 78 | editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett}, 79 | pages = {8024--8035}, 80 | year = {2019}, 81 | publisher = {Curran Associates, Inc.}, 82 | url = {http://papers.neurips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf}, 83 | } 84 | 85 | 86 | @article{scikitlearn:2011, 87 | title = {Scikit-learn: Machine Learning in Python}, 88 | author = {Fabian Pedregosa and Ga{\"e}l Varoquaux and Alexandre Gramfort and Vincent Michel and Bertrand Thirion and Olivier Grisel and Mathieu Blondel and Gilles Louppe and Peter Prettenhofer and Ron Weiss and Vincent Dubourg and Jacob VanderPlas and Alexandre Passos and David Cournapeau and Matthieu Brucher and Matthieu Perrot and Edouard Duchesnay}, 89 | journal = {J. Mach. Learn. Res.}, 90 | year = {2011}, 91 | volume = {12}, 92 | pages = {2825-2830} 93 | } 94 | 95 | 96 | @software{jameslyons:2020, 97 | author = {James Lyons and Darren Yow-Bang Wang and Gianluca and Hanan Shteingart and Erik Mavrinac and Yash Gaurkar and Watcharapol Watcharawisetkul and Sam Birch and Lu Zhihe and Josef Hölzl and Janis Lesinskis and Henrik Almér and Chris Lord and Adam Stark}, 98 | title = {jameslyons/python\_speech\_features: release v0.6.1}, 99 | month = {jan}, 100 | year = {2020}, 101 | publisher = {Zenodo}, 102 | version = {0.6.1}, 103 | doi = {10.5281/zenodo.3607820}, 104 | url = {10.5281/zenodo.3607820} 105 | } 106 | 107 | 108 | @article{milner:2006, 109 | title = {Clean speech reconstruction from MFCC vectors and fundamental frequency using an integrated front-end}, 110 | journal = {Speech Communication}, 111 | volume = {48}, 112 | number = {6}, 113 | pages = {697 - 715}, 114 | year = {2006}, 115 | issn = {0167-6393}, 116 | doi = {10.1016/j.specom.2005.10.004}, 117 | url = {http://www.sciencedirect.com/science/article/pii/S0167639305002359}, 118 | author = {Ben Milner and Xu Shao} 119 | } 120 | 121 | 122 | @inproceedings{kishore:2013, 123 | author = {K. V. {Krishna Kishore} and P. {Krishna Satish}}, 124 | booktitle = {2013 3rd IEEE International Advance Computing Conference (IACC)}, 125 | title = {Emotion recognition in speech using MFCC and wavelet features}, 126 | year = {2013}, 127 | volume = {}, 128 | number = {}, 129 | pages = {842-847}, 130 | doi = {10.1109/IAdCC.2013.6514336}, 131 | ISSN = {null}, 132 | month = {Feb} 133 | } 134 | 135 | 136 | @article{sreeram:2015, 137 | author = {Sreeram, Lalitha and Geyasruti, D. and Narayanan, Ramachandran and M, Shravani}, 138 | year = {2015}, 139 | month = {12}, 140 | pages = {29-35}, 141 | title = {Emotion Detection Using MFCC and Cepstrum Features}, 142 | volume = {70}, 143 | journal = {Procedia Computer Science}, 144 | doi = {10.1016/j.procs.2015.10.020} 145 | } 146 | 147 | 148 | @inproceedings{dahake:2016, 149 | author = {P. P. {Dahake} and K. {Shaw} and P. {Malathi}}, 150 | booktitle = {2016 International Conference on Automatic Control and Dynamic Optimization Techniques (ICACDOT)}, 151 | title = {Speaker dependent speech emotion recognition using MFCC and Support Vector Machine}, 152 | year = {2016}, 153 | volume = {}, 154 | number = {}, 155 | pages = {1080-1084}, 156 | doi = {10.1109/ICACDOT.2016.7877753}, 157 | ISSN = {null}, 158 | month = {Sep.}, 159 | } 160 | 161 | 162 | @inproceedings{kandali:2008, 163 | author = {A. B. {Kandali} and A. {Routray} and T. K. {Basu}}, 164 | booktitle = {TENCON 2008 - 2008 IEEE Region 10 Conference}, 165 | title = {Emotion recognition from Assamese speeches using MFCC features and GMM classifier}, 166 | year = {2008}, 167 | volume = {}, 168 | number = {}, 169 | pages = {1-5}, 170 | doi = {10.1109/TENCON.2008.4766487}, 171 | ISSN = {2159-3450}, 172 | month = {Nov}, 173 | } 174 | 175 | 176 | @article{raju:2018, 177 | title = {Data Augmentation for Robust Keyword Spotting under Playback Interference}, 178 | author = {Anirudh Raju and Sankaran Panchapagesan and Xing Liu and Arindam Mandal and Nikko Strom}, 179 | journal = {ArXiv}, 180 | year = {2018}, 181 | volume = {abs/1808.00563} 182 | } 183 | 184 | 185 | @book{self:2009, 186 | title = {Audio engineering: Know it all}, 187 | author = {Self, Douglas and Duncan, Ben and Sinclair, Ian and Brice, Richard and Hood, John Linsley and Singmin, Andrew and Davis, Don and Patronis, Eugene and Watkinson, John}, 188 | volume = {1}, 189 | year = {2009}, 190 | publisher = {Elsevier Inc.}, 191 | address = {Oxford, UK}, 192 | chapter = {8.}, 193 | pages = {277--278}, 194 | ISBN = {9780080949642} 195 | } 196 | 197 | @book{poularikas:1999, 198 | title = {The Handbook of Formulas and Tables for Signal Processing}, 199 | author = {Poularikas, Alexander D.}, 200 | year = {1999}, 201 | edition = {1}, 202 | volume = {1}, 203 | publisher = {CRC Press}, 204 | address = {Boca Raton, USA}, 205 | chapter = {7}, 206 | pages = {137--138}, 207 | ISBN = {9781315219707}, 208 | doi = {10.1201/9781315219707} 209 | } 210 | 211 | 212 | @MastersThesis{shelvock:2012, 213 | author = {Shelvock, Matt}, 214 | title = {{Audio Mastering as Musical Practice}}, 215 | school = {The University of Western Ontario: The School of Graduate and Postdoctoral Studies}, 216 | address = {London, Ontario, Canada}, 217 | year = {2012}, 218 | url = {https://ir.lib.uwo.ca/etd/530}, 219 | } 220 | 221 | 222 | @misc{hari:2012, 223 | author = {Hari Balakrishnan and George Verghese}, 224 | title = {{6.02 Introduction to EECS II}}, 225 | journal = {Digital Communication Systems}, 226 | month = {Fall}, 227 | year = {2012}, 228 | school = {}, 229 | publisher = {Massachusetts Institute of Technology: MIT OpenCourseWare, \url{https://ocw.mit.edu}. License: \href{https://creativecommons.org/licenses/by-nc-sa/4.0/}{Creative Commons BY-NC-SA}}, 230 | } 231 | 232 | 233 | @misc{bernsee:2005, 234 | title = {Time Stretching And Pitch Shifting of Audio Signals An Overview}, 235 | author = {Stephan M. Bernsee}, 236 | year = {2005}, 237 | url = {http://blogs.zynaptiq.com/bernsee/time-pitch-overview/}, 238 | } 239 | 240 | 241 | @misc{bernsee:2005, 242 | title = {Nyquist–Shannon sampling theorem}, 243 | author = {Emiel Por, Maaike van Kooten and Vanja Sarkovic}, 244 | month = {May}, 245 | year = {2019}, 246 | url = {https://home.strw.leidenuniv.nl/~por/AOT2019/docs/AOT_2019_Ex13_NyquistTheorem.pdf}, 247 | } 248 | -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Pydiogment: A Python package for audio augmentation' 3 | tags: 4 | - Python 5 | - Signal processing 6 | - Data augmentation 7 | - Audio classification 8 | authors: 9 | - name: Ayoub Malek 10 | orcid: 0000-0003-0872-7098 11 | affiliation: "1" 12 | - name: Hasna Marwa Malek 13 | orcid: 0000-0003-0872-7098 14 | affiliation: "2" 15 | affiliations: 16 | - name: Technical University of Munich 17 | index: 1 18 | - name: Grenoble Institute of Technology 19 | index: 2 20 | date: 25 February 2020 21 | bibliography: paper.bib 22 | --- 23 | 24 | # Summary 25 | Audio data augmentation is a key step in training Machine Learning (ML) models to solve audio classification tasks. 26 | It is applied to increase the quality and size of the labeled training dataset, in order to improve the recognition accuracy. 27 | Data augmentation is simply a deformation technique that helps stretch the data and increases its size for better training. 28 | Unlike image augmentation, audio augmentation is still limitly explored by research and most deformation strategies manipulate the computed spectograms rather than the raw audio. With the exception of few libraries constrained to work with Pytorch [@pytorch:2019], most existing tools in this context either act on spectograms such as Google's Specaugment [@specaugment:2019], or are developed for music data augmentation like muda [@muda:2015]. This paper describes version 0.1.0 of `Pydiogment`: a Python package for audio augmentation based on the Scipy [@scipy:2019] and FFmpeg [@ffmpeg:2019] libraries. 29 | `Pydiogment` implements various augmentation techniques that can be used to improve the accuracy of various recognition tasks (speaker recognition, spoken emotions recognition, speech recognition etc.) and avoid over-fitting when training models. 30 | The paper provides a brief overview of the library’s functionality, along with an emotions recognition experiment displaying the utility of the library. 31 | 32 | # Implementation and theory 33 | `Pydiogment` includes 3 general categories of deformations / augmentations: 34 | 35 | ## Amplitude based augmentations (`auga.py`) 36 | - **Apply Gain:** This deformation can be described as an amplification of the signal and the noise by applying a given gain (in dB) to the input signal. Note that excessive gain application can result in clipping [@self:2009]. 37 | 38 | - **Add Fade:** adds fade-in and fade-out effects to the original signal. This is done by multiplying a hamming window with the original signal $y[n] = x[n] \cdot w[n]$, where $x[n]$ is the original signal, $y[n]$ is the augmented signal and $w[n]$ is the computed hamming window [@poularikas:1999]. 39 | 40 | - **Normalize:** Normalization refers to the practice of applying a uniform amount of gain across a signal, where signal-to-noise ratio and general dynamics levels remain unchanged [@shelvock:2012]. The normalization can be applied using the peak normalization method $y[n] = \frac{ x[n]}{\max(x[n])}$ or the Root Mean Square (RMS) approach $y[n] = \sqrt{\frac{N \cdot 10^(\frac{r}{20})}{\sum_{i=0}^{N-1}x^2[i]}} \cdot x[n]$, where $x[n]$ is the original signal, $y[n]$ is the augmented signal, N is the length of $x[n]$ and $r$ is the input RMS level in dB. 41 | 42 | - **Add Noise:** Additive White Gaussian Noise (AWGN) is added to the input signal based on a given signal-to-noise ratio (SNR) in dB : 43 | \begin{equation} 44 | y[n] = x[n] + \sqrt{\frac{P_x}{P_{awgn}} \cdot 10^{-(\frac{SNR_{db}}{10})}} \cdot awgn[n] 45 | \end{equation} 46 | with $x[n]$ is the original signal, $y[n]$ is the augmented noisy signal, $awgn[n]$ is a random Gaussian white noise signal with standard deviation = 1 & mean = 0, $P_x$ & $P_{awgn}$ are respectively the signal power and noise power [@hari:2012]. 47 | 48 | 49 | ## Frequency based augmentation (`augf.py`) 50 | - **Change tone:** a tone or pitch are sound's properties that allow ordering on a frequency-related scale. This is usually characterized by a fundamental frequency that can be adjusted without changing the tempo to provide a deformed audio, this is also know as pitch shifting [@bernsee:2005]. 51 | 52 | - **Convolve:** This is also called reverberating the audio and it consists of a convolution of the original signal with a given Room Impulse Response (RIR) to simulate an audio captured using far-field microphones in a different setup/channel $y[n] = x[n] * rir[n]$, where $x[n]$ is the original signal, $y[n]$ is the augmented signal and $rir[n]$ is the room impulse response [@raju:2018]. 53 | 54 | - **Apply Filter:** apply various types of Scipy-based Butterworth filters (low-pass, high-pass or band-pass). 55 | 56 | \begin{figure}[!htb] 57 | \minipage{0.32\textwidth} 58 | \includegraphics[width=\linewidth]{figs/lpass.png} 59 | \endminipage\hfill 60 | \minipage{0.32\textwidth} 61 | \includegraphics[width=\linewidth]{figs/bpass.png} 62 | \endminipage\hfill 63 | \minipage{0.32\textwidth}% 64 | \includegraphics[width=\linewidth]{figs/hpass.png} 65 | \endminipage 66 | \caption{Frequency-Gain graphs of the implemented Butterworth filers (left: low-pass, middle: band-pass, right: high-pass) with a low cutoff frequency of 300 $Hz$ and a high cutoff frequency of 3000 $Hz$.} 67 | \end{figure} 68 | 69 | ## Time based augmentation (`augt.py`) 70 | - **Stretch Time:** also known as time compression/expansion, it is reciprocal to pitch shifting. Essentially the audio is slowed down or accelerated based on a given coefficient [@bernsee:2005]. 71 | 72 | - **Shift Time:** this includes shifting the signal in a certain time direction to create augmented signals with different chronological orders, and also reversing the whole signal. 73 | 74 | - **Crop:** generates a randomly cropped audio based on the original signal and minimum signal/audio length. 75 | 76 | - **Remove Silence:** filters out silent frames from the input signal using FFmpeg `silenceremove` filter [@ffmpeg:2019]. 77 | 78 | - **Resample:** with the help of Scipy, the input signal is resampled given an input sampling rate with respect to the Nyquist–Shannon sampling theorem. 79 | 80 | The aforementioned augmentation strategies can be combined to generate various sub-strategies. 81 | However, it is very crucial to maintain semantic validity when augmenting the data. 82 | *For example:* one cannot change tones when doing voice-based gender classification and still expect tone to be a separating feature of the predicted classes. 83 | 84 | # Experiment & Results 85 | To prove the utility of `Pydiogment`, we display its effect on a spoken emotions recognition task. 86 | We use the **Emo-DB** data-set [@Burkhardt:2005] as a starting point, which is a small German audio dataset simulating 7 different emotions (neutral, sadness, anger, boredom, fear, happiness, disgust). We choose the Mel-Frequency Cepstral Coefficients (MFCCs) [@milner:2006] as the characterizing low-level audio features due to previous proved success on similar problems [@kandali:2008; @kishore:2013; @sreeram:2015; @dahake:2016]. The features are extracted using the python_speech_features library [@jameslyons:2020]. In a first phase and using the scikit-learn library [@scikitlearn:2011], we apply various recognition algorithms on the original data such as K-Nearest Neighbors (KNN), random forests, decision trees, Quadratic Discriminant Analysis (QDA), Support Vector Machines (SVM) etc. 87 | In a second phase, we augment the data using `Pydiogment` by applying the following techniques: 88 | 89 | - slow down samples using a coefficient of $0.8$. 90 | - speed up samples coefficient of $1.2$. 91 | - randomly crop samples with a minimum length of $1$ second. 92 | - add noise with an SNR $= 10$ dB. 93 | - add a fade in and fade out effect. 94 | - apply gain of $-100$ dB. 95 | - apply gain of $-50$ dB. 96 | - convolve with noise file using a level $= 10^{-2.75}$. 97 | - shift time with one second ($1$ sec) to the right (direction = right) 98 | - shift time with one second ($1$ sec) to the left (direction = left) 99 | - change tone with tone coefficient equal to $0.9$. 100 | - change tone with tone coefficient equal to $1.1$. 101 | 102 | Then we re-run the same recognition algorithms on the augmented and original data. The following is a comparison of the results: 103 | \begin{table}[h] 104 | \begin{tabular}{l*{6}{c}r} 105 | \hline 106 | Machine learning Algorithm & Accuracy (no augmentation) & Accuracy (with augmentation)\\ 107 | \hline\hline 108 | AdaBoost & 0.309 & 0.513 \\ 109 | Decision Tree & 0.454 & 0.764 \\ 110 | Extra Trees Classifier & 0.588 & 0.916 \\ 111 | Gaussian Process & 0.247 & 0.700 \\ 112 | KNN & 0.629 & 0.867 \\ 113 | Linear SVM & 0.608 & 0.693 \\ 114 | MLP Classifier & 0.608 & 0.811 \\ 115 | Naive Bayes & 0.577 & 0.610 \\ 116 | QDA & 0.608 & 0.764 \\ 117 | Random Forest & 0.567 & 0.929 \\ 118 | \hline 119 | \end{tabular} 120 | \caption{Accuracy comparison of results with and without data augmentation.} 121 | \end{table} 122 | 123 | # Conclusion 124 | This paper introduced `Pydiogment`, a Python package for audio data augmentation, with diverse audio deformation strategies. 125 | These strategies aim to improve the accuracy of audio based recognition system by scaling the training dataset and increasing its quality/diversity. 126 | The utility of `Pydiogment` was proved by showing its effects when used in a spoken emotions recognition task. 127 | In the stated experiment, the augmentation using `Pydiogment` improved the accuracy up to 50%. 128 | 129 | 130 | # References 131 | -------------------------------------------------------------------------------- /paper/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/paper/paper.pdf -------------------------------------------------------------------------------- /paper/refs.md: -------------------------------------------------------------------------------- 1 | # This is a list of useful papers and autolink_bare_uris 2 | 3 | - https://www.researchgate.net/publication/334279066_A_survey_on_Image_Data_Augmentation_for_Deep_Learning 4 | - https://www.researchgate.net/publication/325920702_Data_augmentation_for_improving_deep_learning_in_image_classification_problem 5 | - https://arxiv.org/pdf/1912.05472.pdf 6 | -------------------------------------------------------------------------------- /pydiogment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/pydiogment/__init__.py -------------------------------------------------------------------------------- /pydiogment/auga.py: -------------------------------------------------------------------------------- 1 | """ 2 | - Description: amplitude based augmentation techniques/manipulations for audio data. 3 | """ 4 | import os 5 | import numpy as np 6 | from .utils.io import read_file, write_file 7 | 8 | 9 | def apply_gain(infile, gain): 10 | """ 11 | Apply gain to infile. 12 | 13 | Args: 14 | infile (str) : input filename/path. 15 | gain (float) : gain in dB (both positive and negative). 16 | """ 17 | # read input file 18 | fs, x = read_file(filename=infile) 19 | 20 | # apply gain 21 | x = np.copy(x) 22 | x = x * (10**(gain / 10.0)) 23 | x = np.minimum(np.maximum(-1.0, x), 1.0) 24 | x /= np.mean(np.abs(x)) 25 | 26 | # export data to file 27 | output_file_path = os.path.dirname(infile) 28 | name_attribute = "_augmented_with_%s_gain.wav" % str(gain) 29 | write_file(output_file_path=output_file_path, 30 | input_file_name=infile, 31 | name_attribute=name_attribute, 32 | sig=x, 33 | fs=fs) 34 | 35 | 36 | def add_noise(infile, snr): 37 | """ 38 | Augment data using noise injection. 39 | 40 | Note: 41 | It simply add some random values to the input file data based on the snr. 42 | 43 | Args: 44 | infile (str) : input filename/path. 45 | snr (int) : signal to noise ratio in dB. 46 | """ 47 | # read input file 48 | fs, sig = read_file(filename=infile) 49 | 50 | # compute and apply noise 51 | noise = np.random.randn(len(sig)) 52 | 53 | # compute powers 54 | noise_power = np.mean(np.power(noise, 2)) 55 | sig_power = np.mean(np.power(sig, 2)) 56 | 57 | # compute snr and scaling factor 58 | snr_linear = 10**(snr / 10.0) 59 | noise_factor = (sig_power / noise_power) * (1 / snr_linear) 60 | 61 | # add noise 62 | y = sig + np.sqrt(noise_factor) * noise 63 | 64 | # construct file names 65 | output_file_path = os.path.dirname(infile) 66 | name_attribute = "_augmented_%s_noisy.wav" % snr 67 | 68 | # export data to file 69 | write_file(output_file_path=output_file_path, 70 | input_file_name=infile, 71 | name_attribute=name_attribute, 72 | sig=y, 73 | fs=fs) 74 | 75 | 76 | def fade_in_and_out(infile): 77 | """ 78 | Add a fade in and out effect to the audio file. 79 | 80 | Args: 81 | infile (str) : input filename/path. 82 | """ 83 | # read input file 84 | fs, sig = read_file(filename=infile) 85 | window = np.hamming(len(sig)) 86 | 87 | # construct file names 88 | output_file_path = os.path.dirname(infile) 89 | name_attribute = "_augmented_fade_in_out.wav" 90 | 91 | # fade in and out 92 | window = np.hamming(len(sig)) 93 | augmented_sig = window * sig 94 | augmented_sig /= np.mean(np.abs(augmented_sig)) 95 | 96 | # export data to file 97 | write_file(output_file_path=output_file_path, 98 | input_file_name=infile, 99 | name_attribute=name_attribute, 100 | sig=augmented_sig, 101 | fs=fs) 102 | 103 | 104 | def normalize(infile, normalization_technique="peak", rms_level=0): 105 | """ 106 | Normalize the signal given a certain technique (peak or rms). 107 | 108 | Args: 109 | infile (str) : input filename/path. 110 | normalization_technique (str) : type of normalization technique to use. (default is peak) 111 | rms_level (int) : rms level in dB. 112 | """ 113 | # read input file 114 | fs, sig = read_file(filename=infile) 115 | 116 | # normalize signal 117 | if normalization_technique == "peak" : 118 | y = sig / np.max(sig) 119 | 120 | elif normalization_technique == "rms": 121 | # linear rms level and scaling factor 122 | r = 10**(rms_level / 10.0) 123 | a = np.sqrt( (len(sig) * r**2) / np.sum(sig**2) ) 124 | 125 | # normalize 126 | y = sig * a 127 | 128 | else : 129 | print("ParameterError: Unknown normalization_technique variable.") 130 | 131 | # construct file names 132 | output_file_path = os.path.dirname(infile) 133 | name_attribute = "_augmented_{}_normalized.wav".format(normalization_technique) 134 | 135 | # export data to file 136 | write_file(output_file_path=output_file_path, 137 | input_file_name=infile, 138 | name_attribute=name_attribute, 139 | sig=y, 140 | fs=fs) 141 | -------------------------------------------------------------------------------- /pydiogment/augf.py: -------------------------------------------------------------------------------- 1 | """ 2 | - Description: frequency based augmentation techniques/manipulations for audio data. 3 | """ 4 | import os 5 | import subprocess 6 | import numpy as np 7 | from .utils.filters import butter_filter 8 | from .utils.io import read_file, write_file 9 | 10 | 11 | def convolve(infile, ir_fname, level=0.5): 12 | """ 13 | Apply convolution to infile using the given impulse response file. 14 | 15 | Args: 16 | infile (str) : input filename/path. 17 | ir_fname (str) : name of impulse response file. 18 | level (float) : can be between 0 and 1, default value = 0.5 19 | """ 20 | # read input file 21 | fs1, x = read_file(filename=infile) 22 | x = np.copy(x) 23 | 24 | # change the path below for the sounds folder 25 | _, ir = read_file(filename=ir_fname) 26 | 27 | # apply convolution 28 | y = np.convolve(x, ir, 'full')[0:x.shape[0]] * level + x * (1 - level) 29 | 30 | # normalize 31 | y /= np.mean(np.abs(y)) 32 | 33 | # export data to file 34 | output_file_path = os.path.dirname(infile) 35 | name_attribute = "_augmented_{0}_convolved_with_level_{1}.wav".format(os.path.basename(ir_fname.split(".")[0]), 36 | level) 37 | write_file(output_file_path=output_file_path, 38 | input_file_name=infile, 39 | name_attribute=name_attribute, 40 | sig=y, 41 | fs=fs1) 42 | 43 | 44 | def change_tone(infile, tone): 45 | """ 46 | Change the tone of an audio file. 47 | 48 | Args: 49 | infile (str) : input audio filename. 50 | tone (int) : tone to change. 51 | """ 52 | # read input file 53 | fs, _ = read_file(filename=infile) 54 | 55 | # prepare file names for the tone changing command 56 | input_file_name = os.path.basename(infile).split(".wav")[0] 57 | output_file_path = os.path.dirname(infile) 58 | name_attribute = "_augmented_%s_toned.wav" % str(tone) 59 | outfile = os.path.join(output_file_path, input_file_name + name_attribute) 60 | 61 | # change tone 62 | tone_change_command = ["ffmpeg", "-i", infile, "-af", 63 | "asetrate="+str(fs) + "*" + str(tone) + ",aresample=" + str(fs), 64 | outfile] 65 | 66 | _ = subprocess.Popen(tone_change_command, 67 | stdin=subprocess.PIPE, 68 | stdout=subprocess.PIPE, 69 | stderr=subprocess.PIPE) 70 | 71 | 72 | def apply_filter(infile, filter_type, low_cutoff_freq, high_cutoff_freq=None, order=5): 73 | """ 74 | Apply a certain type of Buttenworth filter on the input audio. 75 | 76 | Args: 77 | infile (str) : input audio filename. 78 | filter_type (str) : type of the filter to apply. 79 | low_cutoff_freq (float) : the low cut-off frequency of the filter. 80 | high_cutoff_freq (float) : the high cut-off frequency of the filter. 81 | order (int) : filter order to define its accuracy. 82 | """ 83 | # read input file 84 | fs, sig = read_file(filename=infile) 85 | 86 | # apply filter 87 | y = butter_filter(sig=sig, fs=fs, ftype=filter_type, 88 | low_cut=low_cutoff_freq, 89 | high_cut=high_cutoff_freq, 90 | order=order) 91 | 92 | # export data to file 93 | output_file_path = os.path.dirname(infile) 94 | name_attribute = "_augmented_{0}_pass_filtered.wav".format(filter_type) 95 | write_file(output_file_path=output_file_path, 96 | input_file_name=infile, 97 | name_attribute=name_attribute, 98 | sig=y, 99 | fs=fs) 100 | -------------------------------------------------------------------------------- /pydiogment/augt.py: -------------------------------------------------------------------------------- 1 | """ 2 | - Description: time based augmentation techniques/manipulations for audio data. 3 | """ 4 | import os 5 | import math 6 | import random 7 | import warnings 8 | import subprocess 9 | import numpy as np 10 | from .utils.io import read_file, write_file 11 | 12 | 13 | def eliminate_silence(infile): 14 | """ 15 | Eliminate silence from voice file using ffmpeg library. 16 | 17 | Args: 18 | infile (str) : Path to get the original voice file from. 19 | 20 | Returns: 21 | list including True for successful authentication, False otherwise and 22 | a percentage value representing the certainty of the decision. 23 | """ 24 | # define output name if none specified 25 | output_path = infile.split(".wav")[0] + "_augmented_without_silence.wav" 26 | 27 | # filter silence in wav 28 | remove_silence_command = ["ffmpeg", "-i", infile, 29 | "-af", 30 | "silenceremove=stop_periods=-1:stop_duration=0.25:stop_threshold=-36dB", 31 | "-acodec", "pcm_s16le", 32 | "-ac", "1", output_path] 33 | out = subprocess.Popen(remove_silence_command, 34 | stdout=subprocess.PIPE, 35 | stderr=subprocess.PIPE) 36 | out.wait() 37 | 38 | with_silence_duration = os.popen( 39 | "ffprobe -i '" + infile + 40 | "' -show_format -v quiet | sed -n 's/duration=//p'").read() 41 | no_silence_duration = os.popen( 42 | "ffprobe -i '" + output_path + 43 | "' -show_format -v quiet | sed -n 's/duration=//p'").read() 44 | return with_silence_duration, no_silence_duration 45 | 46 | 47 | def random_cropping(infile, min_len=1): 48 | """ 49 | Crop the infile with an input minimum duration. 50 | 51 | Args: 52 | infile (str) : Input filename. 53 | min_len (float) : Minimum duration for randomly cropped excerpt 54 | """ 55 | fs, x = read_file(filename=infile) 56 | t_end = x.size / fs 57 | if (t_end > min_len): 58 | # get start and end time 59 | start = random.uniform(0.0, t_end - min_len) 60 | end = random.uniform(start + min_len, t_end) 61 | 62 | # crop data 63 | y = x[int(math.floor(start * fs)):int(math.ceil(end * fs))] 64 | 65 | # construct file names 66 | output_file_path = os.path.dirname(infile) 67 | name_attribute = "_augmented_randomly_cropped_%s.wav" % str(min_len) 68 | 69 | # export data to file 70 | write_file(output_file_path=output_file_path, 71 | input_file_name=infile, 72 | name_attribute=name_attribute, 73 | sig=y, 74 | fs=fs) 75 | 76 | else: 77 | warning_msg = """ 78 | min_len provided is greater than the duration of the song. 79 | """ 80 | warnings.warn(warning_msg) 81 | 82 | 83 | def slow_down(input_file, coefficient=0.8): 84 | """ 85 | Slow or stretch a wave. 86 | 87 | Args: 88 | infile (str) : Input filename. 89 | coefficient (float) : coefficient caracterising the slowing degree. 90 | """ 91 | # set-up variables for paths and file names 92 | name_attribute = "_augmented_slowed.wav" 93 | output_file = input_file.split(".wav")[0] + name_attribute 94 | 95 | # apply slowing command 96 | slowing_command = ["ffmpeg", "-i", input_file, "-filter:a", 97 | "atempo={0}".format(str(coefficient)), 98 | output_file] 99 | print(" ".join(slowing_command)) 100 | p = subprocess.Popen(slowing_command, 101 | stdin=subprocess.PIPE, 102 | stdout=subprocess.PIPE, 103 | stderr=subprocess.PIPE) 104 | output, error = p.communicate() 105 | print(output, error.decode("utf-8") ) 106 | 107 | # for i in error.decode("utf-8") : print(i) 108 | print("Writing data to " + output_file + ".") 109 | 110 | 111 | def speed(input_file, coefficient=1.25): 112 | """ 113 | Speed or shrink a wave. 114 | 115 | Args: 116 | infile (str) : Input filename. 117 | coefficient (float) : coefficient caracterising the speeding degree. 118 | """ 119 | # set-up variables for paths and file names 120 | name_attribute = "_augmented_speeded.wav" 121 | output_file = input_file.split(".wav")[0] + name_attribute 122 | 123 | # apply slowing command 124 | speeding_command = ["ffmpeg", "-i", input_file, "-filter:a", 125 | "atempo={0}".format(str(coefficient)), 126 | output_file] 127 | _ = subprocess.Popen(speeding_command, 128 | stdin=subprocess.PIPE, 129 | stdout=subprocess.PIPE, 130 | stderr=subprocess.PIPE) 131 | print("Writing data to " + output_file + ".") 132 | 133 | 134 | def shift_time(infile, tshift, direction): 135 | """ 136 | Augment audio data by shifting the time in the file. Signal can be shifted 137 | to the left or right. 138 | 139 | Note: 140 | Time shifting is simply moving the audio to left/right with a random second. 141 | If shifting audio to left (fast forward) with x seconds, first x seconds will mark as 0 (i.e. silence). 142 | If shifting audio to right (back forward) with x seconds, last x seconds will mark as 0 (i.e. silence). 143 | 144 | Args: 145 | infile (str) : Input filename. 146 | tshift (int) : Signal time shift in seconds. 147 | direction (str) : shift direction (to the left or right). 148 | """ 149 | fs, sig = read_file(filename=infile) 150 | shift = int(tshift * fs) * int(direction == "left") - \ 151 | int(tshift * fs) * int(direction == "right") 152 | 153 | # shift time 154 | augmented_sig = np.roll(sig, shift) 155 | 156 | # construct file names 157 | output_file_path = os.path.dirname(infile) 158 | name_attribute = "_augmented_%s_%s_shifted.wav" % (direction, tshift) 159 | 160 | # export data to file 161 | write_file(output_file_path=output_file_path, 162 | input_file_name=infile, 163 | name_attribute=name_attribute, 164 | sig=augmented_sig, 165 | fs=fs) 166 | 167 | 168 | def reverse(infile): 169 | """ 170 | Inverses the input signal to play from the end to the beginning and writes it 171 | to an output file 172 | 173 | Args: 174 | infile (str): Input filename. 175 | """ 176 | fs, sig = read_file(filename=infile) 177 | augmented_sig = sig[::-1] 178 | 179 | # construct file names 180 | output_file_path = os.path.dirname(infile) 181 | name_attribute = "_augmented_reversed.wav" 182 | 183 | # export data to file 184 | write_file(output_file_path=output_file_path, 185 | input_file_name=infile, 186 | name_attribute=name_attribute, 187 | sig=augmented_sig, 188 | fs=fs) 189 | 190 | 191 | 192 | def resample_audio(infile, sr): 193 | """ 194 | Resample the signal according a new input sampling rate with respect to the 195 | Nyquist-Shannon theorem. 196 | 197 | Args: 198 | infile (str) : input filename/path. 199 | sr (int) : new sampling rate. 200 | """ 201 | # set-up variables for paths and file names 202 | output_file = "{0}_augmented_resampled_to_{1}.wav".format(infile.split(".wav")[0], 203 | sr) 204 | 205 | # apply slowing command 206 | sampling_command = ["ffmpeg", "-i", infile, "-ar", str(sr), output_file] 207 | print(" ".join(sampling_command)) 208 | _ = subprocess.Popen(sampling_command, 209 | stdout=subprocess.PIPE, 210 | stderr=subprocess.PIPE) 211 | -------------------------------------------------------------------------------- /pydiogment/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/pydiogment/utils/__init__.py -------------------------------------------------------------------------------- /pydiogment/utils/filters.py: -------------------------------------------------------------------------------- 1 | """ 2 | - Description: implements the scipybased Butterworth filters. 3 | - bandpas: https://stackoverflow.com/questions/12093594/how-to-implement-band-pass-butterworth-filter-with-scipy-signal-butter 4 | - highpass: https://stackoverflow.com/questions/39032325/python-high-pass-filter 5 | """ 6 | from scipy.signal import butter, lfilter 7 | 8 | 9 | def butter_lowpass(cutoff, fs, order=5): 10 | """ 11 | Design lowpass filter. 12 | 13 | Args: 14 | cutoff (float) : the cutoff frequency of the filter. 15 | fs (float) : the sampling rate. 16 | order (int) : order of the filter, by default defined to 5. 17 | """ 18 | # calculate the Nyquist frequency 19 | nyq = 0.5 * fs 20 | 21 | # design filter 22 | low = cutoff / nyq 23 | b, a = butter(order, low, btype='low', analog=False) 24 | 25 | # returns the filter coefficients: numerator and denominator 26 | return b, a 27 | 28 | 29 | def butter_highpass(cutoff, fs, order=5): 30 | """ 31 | Design a highpass filter. 32 | 33 | Args: 34 | cutoff (float) : the cutoff frequency of the filter. 35 | fs (float) : the sampling rate. 36 | order (int) : order of the filter, by default defined to 5. 37 | """ 38 | # calculate the Nyquist frequency 39 | nyq = 0.5 * fs 40 | 41 | # design filter 42 | high = cutoff / nyq 43 | b, a = butter(order, high, btype='high', analog=False) 44 | 45 | # returns the filter coefficients: numerator and denominator 46 | return b, a 47 | 48 | 49 | def butter_bandpass(low_cut, high_cut, fs, order=5): 50 | """ 51 | Design band pass filter. 52 | 53 | Args: 54 | low_cut (float) : the low cutoff frequency of the filter. 55 | high_cut (float) : the high cutoff frequency of the filter. 56 | fs (float) : the sampling rate. 57 | order (int) : order of the filter, by default defined to 5. 58 | """ 59 | # calculate the Nyquist frequency 60 | nyq = 0.5 * fs 61 | 62 | # design filter 63 | low = low_cut / nyq 64 | high = high_cut / nyq 65 | b, a = butter(order, [low, high], btype='band') 66 | 67 | # returns the filter coefficients: numerator and denominator 68 | return b, a 69 | 70 | 71 | def butter_filter(sig, fs, ftype="low", low_cut=50, high_cut=2000, order=5): 72 | """ 73 | Apply filter to signal. 74 | 75 | Args: 76 | sig (array) : the signal array to filter. 77 | fs (float) : the sampling rate. 78 | ftype (str) : the filter type, by default defined to a low pass filter 79 | low_cut (float) : the low cutoff frequency, by default defined to 50Hz 80 | high_cut (float) : the high cutoff frequency, by default defined to 2000Hz. 81 | order (int) : order of the filter, by default defined to 5. 82 | 83 | Returns: 84 | array of the filtered signal. 85 | """ 86 | if ftype == "band" : b, a = butter_bandpass(low_cut, high_cut, fs, order) 87 | elif ftype == "high" : b, a = butter_highpass(high_cut, fs, order) 88 | else : b, a = butter_lowpass(low_cut, fs, order) 89 | 90 | # filter signal 91 | y = lfilter(b, a, sig) 92 | return y 93 | -------------------------------------------------------------------------------- /pydiogment/utils/io.py: -------------------------------------------------------------------------------- 1 | """ 2 | - Description: write and read module for wave data. 3 | """ 4 | import os 5 | from scipy.io.wavfile import read, write 6 | 7 | 8 | def read_file(filename): 9 | """ 10 | Read wave file as mono. 11 | 12 | Args: 13 | filename (str) : wave file / path. 14 | 15 | Returns: 16 | tuple of sampling rate and audio data. 17 | """ 18 | fs, sig = read(filename=filename) 19 | if (sig.ndim == 1): 20 | samples = sig 21 | else: 22 | samples = sig[:, 0] 23 | return fs, samples 24 | 25 | 26 | def write_file(output_file_path, input_file_name, name_attribute, sig, fs): 27 | """ 28 | Read wave file as mono. 29 | 30 | Args: 31 | output_file_path (str) : path to save resulting wave file to. 32 | input_file_name (str) : name of processed wave file, 33 | name_attribute (str) : attribute to add to output file name. 34 | sig (array) : signal/audio array. 35 | fs (int) : sampling rate. 36 | 37 | Returns: 38 | tuple of sampling rate and audio data. 39 | """ 40 | # set-up the output file name 41 | fname = os.path.basename(input_file_name).split(".wav")[0] + name_attribute 42 | fpath = os.path.join(output_file_path, fname) 43 | write(filename=fpath, rate=fs, data=sig) 44 | print("Writing data to " + fpath + ".") 45 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.17.2 2 | scipy==1.3.1 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [egg_info] 2 | #tag_build = dev 3 | #tag_date = true 4 | 5 | [pycodestyle] 6 | max-line-length=119 7 | 8 | [aliases] 9 | release = egg_info -RDb '' register bdist_egg sdist upload 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pathlib 3 | from setuptools import setup, find_packages 4 | 5 | # The directory containing this file 6 | path = pathlib.Path(__file__).parent 7 | 8 | # get readme text 9 | readme = (path / "README.md").read_text() 10 | # define requirements 11 | requires = ["numpy>=1.17.2", "scipy>=1.3.1"] 12 | 13 | setup ( 14 | name = 'pydiogment', 15 | version = '0.0.3', 16 | author = 'SuperKogito, HMMalek', 17 | author_email = 'superkogito@gmail.com, hasna.m.malek@gmail.com', 18 | description = 'Python audio augmentation', 19 | long_description = readme, 20 | long_description_content_type = "text/markdown", 21 | license = 'BSD', 22 | url = 'https://github.com/SuperKogito/pydiogment', 23 | packages = find_packages(), 24 | classifiers = [ 25 | 'Development Status :: 3 - Alpha', 26 | 'Environment :: Console', 27 | 'Environment :: Web Environment', 28 | 'Intended Audience :: Developers', 29 | 'License :: OSI Approved :: BSD License', 30 | 'Operating System :: OS Independent', 31 | 'Programming Language :: Python', 32 | 'Topic :: Documentation', 33 | 'Topic :: Utilities', 34 | ], 35 | platforms = 'any', 36 | include_package_data = True, 37 | install_requires = requires, 38 | python_requires = '>=3.5', 39 | ) 40 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_auga.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ############################# tests for auga ################################### 3 | ################################################################################ 4 | import time 5 | import pytest 6 | from tests.test_utils import assert_file_exists 7 | from pydiogment.auga import apply_gain, add_noise, fade_in_and_out, normalize 8 | 9 | 10 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 11 | @pytest.mark.parametrize('gain', [-100, -50, -25]) 12 | def test_apply_gain(test_file, gain): 13 | """ 14 | Test apply gain function. 15 | """ 16 | apply_gain(infile=test_file, gain=gain) 17 | 18 | # check result 19 | fname = "%s_augmented_with_%s_gain.wav" % (test_file.split(".wav")[0], str(gain)) 20 | time.sleep(1) 21 | assert_file_exists(fname) 22 | 23 | 24 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 25 | @pytest.mark.parametrize('snr', [-3, -6, -20, -50, -100]) 26 | def test_add_noise(test_file, snr): 27 | """ 28 | Test adding noise function. 29 | """ 30 | add_noise(test_file, snr) 31 | 32 | # check result 33 | fname = "%s_augmented_%s_noisy.wav" % (test_file.split(".wav")[0], str(snr)) 34 | time.sleep(1) 35 | assert_file_exists(fname) 36 | 37 | 38 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 39 | def test_fade_in_and_out(test_file): 40 | """ 41 | Test function for adding a fade in and fade out effect. 42 | """ 43 | fade_in_and_out(test_file) 44 | 45 | # check result 46 | fname = "%s_augmented_fade_in_out.wav" % (test_file.split(".wav")[0]) 47 | time.sleep(1) 48 | assert_file_exists(fname) 49 | 50 | 51 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 52 | @pytest.mark.parametrize('normalization_technique', ['peak', 'rms']) 53 | @pytest.mark.parametrize('rms_level', [-6, -3, 0, 3, 6]) 54 | def test_normalize(test_file, normalization_technique, rms_level): 55 | """ 56 | Test function for the normalization function. 57 | """ 58 | normalize(test_file, normalization_technique, rms_level) 59 | 60 | # check result 61 | fname = "{0}_augmented_{1}_normalized.wav".format(test_file.split(".wav")[0], 62 | normalization_technique) 63 | time.sleep(1) 64 | assert_file_exists(fname) 65 | -------------------------------------------------------------------------------- /tests/test_augf.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ############################# tests for augf ################################### 3 | ################################################################################ 4 | import os 5 | import time 6 | import pytest 7 | from tests.test_utils import assert_file_exists 8 | from pydiogment.augf import convolve, change_tone, apply_filter 9 | 10 | 11 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 12 | @pytest.mark.parametrize('ir_fname', ['tests/testfiles/tel_noise.wav']) 13 | @pytest.mark.parametrize('level', [0.5, 0.25, 0.01]) 14 | def test_convolve(test_file, ir_fname, level): 15 | """ 16 | Test the convolution function. 17 | """ 18 | # apply a convolution between the audio input file and a predefined file. 19 | convolve(infile=test_file, ir_fname=ir_fname, level=level) 20 | 21 | # check result 22 | fname = "{0}_augmented_{1}_convolved_with_level_{2}.wav".format(test_file.split(".wav")[0], 23 | os.path.basename(ir_fname.split(".")[0]), 24 | level) 25 | time.sleep(1) 26 | assert_file_exists(fname) 27 | 28 | 29 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 30 | @pytest.mark.parametrize('tone', [0.9, 1.1]) 31 | def test_change_tone(test_file, tone): 32 | """ 33 | Test the tone changing function. 34 | """ 35 | # change audio file tone 36 | change_tone(infile=test_file, tone=tone) 37 | 38 | # check result 39 | fname = "%s_augmented_%s_toned.wav" % (test_file.split(".wav")[0], str(tone)) 40 | time.sleep(5) 41 | assert_file_exists(fname) 42 | 43 | 44 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 45 | @pytest.mark.parametrize('filter_type', [0.9, 1.1]) 46 | @pytest.mark.parametrize('low_cutoff_freq', [20, 30, 50, 100]) 47 | @pytest.mark.parametrize('high_cutoff_freq', [500, 700, 1200, 1500]) 48 | @pytest.mark.parametrize('order', [3, 5, 9]) 49 | def test_apply_filter(test_file, filter_type, low_cutoff_freq, high_cutoff_freq, order): 50 | """ 51 | Test the Buttenworth filters. 52 | """ 53 | # apply filter 54 | apply_filter(test_file, filter_type, low_cutoff_freq, high_cutoff_freq, order) 55 | 56 | # check result 57 | fname = "{0}_augmented_{1}_pass_filtered.wav".format(test_file.split(".wav")[0], filter_type) 58 | time.sleep(3) 59 | assert_file_exists(fname) 60 | -------------------------------------------------------------------------------- /tests/test_augt.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ############################# tests for augt ################################### 3 | ################################################################################ 4 | import time 5 | import pytest 6 | from tests.test_utils import assert_file_exists 7 | from pydiogment.augt import (slow_down, speed, random_cropping, shift_time, 8 | resample_audio, eliminate_silence, reverse) 9 | 10 | 11 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 12 | def test_eliminate_silence(test_file): 13 | """ 14 | Test function for the silence removal. 15 | """ 16 | eliminate_silence(test_file) 17 | 18 | # check result 19 | fname = test_file.split(".wav")[0] + "_augmented_without_silence.wav" 20 | time.sleep(1) 21 | assert_file_exists(fname) 22 | 23 | 24 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 25 | @pytest.mark.parametrize('coefficient', [0.5, 0.8]) 26 | def test_slow_down(test_file, coefficient): 27 | slow_down(test_file, coefficient=0.8) 28 | 29 | # check result 30 | fname = "%s_augmented_slowed.wav" % (test_file.split(".wav")[0]) 31 | time.sleep(1) 32 | assert_file_exists(fname) 33 | 34 | 35 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 36 | @pytest.mark.parametrize('coefficient', [1.2, 1.5]) 37 | def test_speed(test_file, coefficient): 38 | speed(test_file, coefficient=1.2) 39 | 40 | # check result 41 | fname = "%s_augmented_speeded.wav" % (test_file.split(".wav")[0]) 42 | time.sleep(1) 43 | assert_file_exists(fname) 44 | 45 | 46 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 47 | @pytest.mark.parametrize('min_len', [1]) 48 | def test_random_cropping(test_file, min_len): 49 | random_cropping(test_file, min_len) 50 | 51 | # check result 52 | fname = "%s_augmented_randomly_cropped_%s.wav" % (test_file.split(".wav")[0], str(min_len)) 53 | assert_file_exists(fname) 54 | 55 | 56 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 57 | @pytest.mark.parametrize('tshift', [1]) 58 | @pytest.mark.parametrize('direction', ["left", "right"]) 59 | def test_shift_time(test_file, tshift, direction): 60 | shift_time(test_file, tshift, direction) 61 | 62 | # check result 63 | fname = "%s_augmented_%s_%s_shifted.wav" % (test_file.split(".wav")[0], direction, tshift) 64 | assert_file_exists(fname) 65 | 66 | 67 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 68 | def test_reverse(test_file): 69 | """ 70 | Test function for the reversing function. 71 | """ 72 | reverse(test_file) 73 | 74 | # check result 75 | fname = "{0}_augmented_reversed.wav".format(test_file.split(".wav")[0]) 76 | time.sleep(1) 77 | assert_file_exists(fname) 78 | 79 | 80 | @pytest.mark.parametrize('test_file', ['tests/testfiles/test.wav']) 81 | @pytest.mark.parametrize('sr', [4000, 6000, 9000, 16000]) 82 | def test_resample_audio(test_file, sr): 83 | """ 84 | Test function for the resampling function. 85 | """ 86 | resample_audio(test_file, sr) 87 | 88 | # check result 89 | fname = "{0}_augmented_resampled_to_{1}.wav".format(test_file.split(".wav")[0], 90 | sr) 91 | time.sleep(1) 92 | assert_file_exists(fname) 93 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ####################### tests for utils.filters ################################ 3 | ################################################################################ 4 | import os 5 | import pytest 6 | import numpy as np 7 | from pydiogment.utils.filters import butter_filter 8 | 9 | 10 | def assert_file_exists(fname): 11 | """ 12 | Raise AssertionError if file does not exist. 13 | """ 14 | if not (os.path.isfile(fname)): 15 | raise AssertionError 16 | 17 | 18 | @pytest.mark.parametrize('fs', [8000.0]) 19 | @pytest.mark.parametrize('low_cut', [50.0, 150.0, 300.0]) 20 | @pytest.mark.parametrize('high_cut', [1000.0, 2000.0, 3000.0]) 21 | @pytest.mark.parametrize('filter_type', ["low", "high", "band"]) 22 | @pytest.mark.parametrize('order', [3, 5, 6, 9]) 23 | def test_filters(fs, low_cut, high_cut, filter_type, order): 24 | """ 25 | Test function for low, high and bandpass filters. 26 | """ 27 | # define input noisy signal. 28 | T = 0.05 29 | nsamples = T * fs 30 | t = np.linspace(0, nsamples, endpoint=False, retstep=T)[0] 31 | a = 0.02 32 | f0 = 600.0 33 | x = 0.1 * np.sin(2 * np.pi * 1.2 * np.sqrt(t)) 34 | x += 0.01 * np.cos(2 * np.pi * 312 * t + 0.1) 35 | x += a * np.cos(2 * np.pi * f0 * t + .11) 36 | x += 0.03 * np.cos(2 * np.pi * 2000 * t) 37 | 38 | try: 39 | # filter signal 40 | _ = butter_filter(x, fs, filter_type, low_cut, high_cut, order) 41 | 42 | except Exception as e: 43 | print(e) 44 | -------------------------------------------------------------------------------- /tests/testfiles/tel_noise.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/tel_noise.wav -------------------------------------------------------------------------------- /tests/testfiles/test.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_-100_noisy.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_-100_noisy.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_-20_noisy.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_-20_noisy.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_-3_noisy.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_-3_noisy.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_-50_noisy.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_-50_noisy.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_-6_noisy.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_-6_noisy.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_0.9_pass_filtered.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_0.9_pass_filtered.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_0.9_toned.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_0.9_toned.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_1.1_pass_filtered.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_1.1_pass_filtered.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_1.1_toned.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_1.1_toned.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_fade_in_out.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_fade_in_out.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_left_1_shifted.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_left_1_shifted.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_peak_normalized.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_peak_normalized.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_randomly_cropped_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_randomly_cropped_1.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_resampled_to_16000.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_resampled_to_16000.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_resampled_to_4000.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_resampled_to_4000.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_resampled_to_6000.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_resampled_to_6000.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_resampled_to_9000.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_resampled_to_9000.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_reversed.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_reversed.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_right_1_shifted.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_right_1_shifted.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_rms_normalized.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_rms_normalized.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_slowed.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_slowed.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_speeded.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_speeded.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_tel_noise_convolved_with_level_0.01.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_tel_noise_convolved_with_level_0.01.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_tel_noise_convolved_with_level_0.25.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_tel_noise_convolved_with_level_0.25.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_tel_noise_convolved_with_level_0.5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_tel_noise_convolved_with_level_0.5.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_with_-100_gain.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_with_-100_gain.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_with_-25_gain.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_with_-25_gain.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_with_-50_gain.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_with_-50_gain.wav -------------------------------------------------------------------------------- /tests/testfiles/test_augmented_without_silence.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuperKogito/pydiogment/074543dc9483b450653f8a00c8279bf1eb873199/tests/testfiles/test_augmented_without_silence.wav --------------------------------------------------------------------------------