├── .gitignore ├── Copying.docx ├── Dockerfile ├── GNU_AGPL_full.docx ├── LICENSE.md ├── README.md ├── THIRD-PARTY-NOTICES.docx ├── TODO.md ├── audiodata ├── background_samples │ ├── README.md │ ├── cafe.wav │ ├── fridge.wav │ └── traffic.wav ├── car_horn.wav ├── models │ └── denoiser │ │ ├── example_denoiser_stft.h5 │ │ ├── log.csv │ │ └── log_extraction_settings.csv └── python.wav ├── build_aju_image.sh ├── doc_requirements.txt ├── docs └── source │ ├── 0.1.0a2 │ ├── augment.rst │ ├── builtin_sp.rst │ ├── builtin_spdl.rst │ ├── changelog.rst │ ├── conf.py │ ├── datasets.rst │ ├── dsp.rst │ ├── example_cases.rst │ ├── examples │ │ ├── README.txt │ │ ├── plot_SNR_add_noise_to_datasets.py │ │ ├── plot_augment_sound.py │ │ ├── plot_dataset_info_formatting.py │ │ ├── plot_featureprep_denoiser.py │ │ ├── plot_featureprep_envclassifier.py │ │ ├── plot_filter_out_noise.py │ │ ├── plot_implement_denoiser.py │ │ ├── plot_signals_and_features.py │ │ ├── plot_train_classifier.py │ │ ├── plot_train_denoiser.py │ │ └── plot_vad_snr_filter.py │ ├── exceptions.rst │ ├── feats.rst │ ├── files.rst │ ├── filters.rst │ ├── index.rst │ ├── model_dataprep.rst │ ├── modelsetup.rst │ ├── modules.rst │ ├── readme.rst │ ├── template_models.rst │ └── utils.rst │ ├── 0.1.0a3 │ ├── augment.rst │ ├── builtin_sp.rst │ ├── builtin_spdl.rst │ ├── changelog.rst │ ├── conf.py │ ├── datasets.rst │ ├── dsp.rst │ ├── example_cases.rst │ ├── examples │ │ ├── README.txt │ │ ├── plot_SNR_add_noise_to_datasets.py │ │ ├── plot_augment_sound.py │ │ ├── plot_dataset_info_formatting.py │ │ ├── plot_extract_augment_train_classifier.py │ │ ├── plot_featureprep_denoiser.py │ │ ├── plot_featureprep_envclassifier.py │ │ ├── plot_filter_out_noise.py │ │ ├── plot_implement_denoiser.py │ │ ├── plot_signals_and_features.py │ │ ├── plot_train_classifier.py │ │ ├── plot_train_denoiser.py │ │ └── plot_vad_snr_filter.py │ ├── exceptions.rst │ ├── feats.rst │ ├── files.rst │ ├── filters.rst │ ├── index.rst │ ├── model_dataprep.rst │ ├── modelsetup.rst │ ├── modules.rst │ ├── readme.rst │ ├── template_models.rst │ ├── utils.rst │ └── versions.rst │ ├── augment.rst │ ├── builtin_sp.rst │ ├── builtin_spdl.rst │ ├── changelog.rst │ ├── conf.py │ ├── datasets.rst │ ├── dsp.rst │ ├── example_cases.rst │ ├── examples │ ├── README.txt │ ├── plot_SNR_add_noise_to_datasets.py │ ├── plot_augment_sound.py │ ├── plot_dataset_info_formatting.py │ ├── plot_extract_augment_train_classifier.py │ ├── plot_featureprep_denoiser.py │ ├── plot_featureprep_envclassifier.py │ ├── plot_filter_out_noise.py │ ├── plot_implement_denoiser.py │ ├── plot_signals_and_features.py │ ├── plot_train_classifier.py │ ├── plot_train_denoiser.py │ └── plot_vad_snr_filter.py │ ├── exceptions.rst │ ├── feats.rst │ ├── files.rst │ ├── filters.rst │ ├── index.rst │ ├── model_dataprep.rst │ ├── modelsetup.rst │ ├── modules.rst │ ├── readme.rst │ ├── template_models.rst │ ├── utils.rst │ └── versions.rst ├── jupyter_notebooks ├── augment_sound_machine_learning.ipynb ├── filter_out_noise.ipynb ├── generate_signals_noise_snr.ipynb ├── implement_denoiser.ipynb ├── plot_vad_snr_filter.ipynb └── speech_noise_SNR.ipynb ├── new_version_updates.md ├── requirements.txt ├── setup.py ├── soundpy ├── __init__.py ├── __init__.pyc ├── augment.py ├── builtin.py ├── datasets.py ├── dsp.py ├── exceptions.py ├── feats.py ├── files.py ├── filters.py ├── models │ ├── __init__.py │ ├── builtin.py │ ├── dataprep.py │ ├── modelsetup.py │ ├── plot.py │ └── template_models.py ├── utils.py └── utils.pyc ├── start_jup_env.sh ├── tests ├── datasets_test.py ├── dsp_test.py ├── feats_test.py ├── filters_test.py ├── inspect_functions.py └── utils_test.py └── tests_requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | env/ 2 | ve/ 3 | __pycache__/ 4 | saved_features_and_models/ 5 | audiodata/ 6 | images/ 7 | images_1/ 8 | audiodata2/ 9 | audiodata3/ 10 | .ipynb_checkpoints/ 11 | env2/ 12 | env3/ 13 | docs/build/ 14 | docs/doc_layout.md 15 | docs/Makefile 16 | docs/make.bat 17 | docs/source/auto_examples/ 18 | example_dir/ 19 | tests/testing_pypi/ 20 | test_audio/ 21 | compare_augmentations_right/ 22 | compare_augmentations_nine/ 23 | build/ 24 | *.npy 25 | dev_env/ 26 | docs/source/examples/example_feats_models/ 27 | *.png 28 | example_feats_models/ 29 | update_env/ 30 | debug_env/ 31 | p3_test/ 32 | 33 | -------------------------------------------------------------------------------- /Copying.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/Copying.docx -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.1.0-gpu-py3 2 | 3 | RUN apt update && apt upgrade -y 4 | 5 | RUN apt-get install -y libsndfile1 6 | 7 | RUN python -m pip install --upgrade pip 8 | 9 | RUN pip install -U soundfile \ 10 | librosa \ 11 | python_speech_features \ 12 | notebook \ 13 | matplotlib 14 | 15 | RUN pip install -U scikit-image 16 | 17 | RUN mkdir /root/soundpy/ 18 | 19 | WORKDIR /root/soundpy/ 20 | -------------------------------------------------------------------------------- /GNU_AGPL_full.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/GNU_AGPL_full.docx -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ## AGPL-3.0 License 2 | 3 | Copyright (c) 2020, Aislyn Rose. 4 | 5 | Permission to use, copy, modify, and/or distribute this software 6 | under the terms of the GNU General Public License as published by the 7 | Free Software Foundation, either version 3 of the License, or (at your option) 8 | any later version. 9 | 10 | The SoundPy framework is distributed in the hope that it will be useful, but 11 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 13 | details. 14 | -------------------------------------------------------------------------------- /THIRD-PARTY-NOTICES.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/THIRD-PARTY-NOTICES.docx -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | 2 | ## Current 3 | - make it easier to use / build different models 4 | - implement autoencoder model 5 | - implement denoising with autoencoder model 6 | - build autoencoder in keras 7 | - build autoencoder in pytorch 8 | - build via docker image 9 | 10 | ## Functionality 11 | 12 | - autoencoder training 13 | - get postfilter to work on spectral subtraction 14 | - set power_scale default to 'power_to_db'? 15 | - functions to use librosa or not to perform tasks (librosa doesn't work on notebooks.ai for example) 16 | - measure level of snr 17 | - measure quality of filtering/speech enhancement 18 | - measure signal similarity 19 | - source separation 20 | - gender switch 21 | - text to speech 22 | - speech to text 23 | - dataset exploration (visualize 10 random samples/ based on size?, etc.) 24 | - simple inclusion of noise reduction into training models 25 | - pysoundtool and pysoundtool.online version? (use librosa vs no librosa) 26 | 27 | ## Presentation 28 | 29 | - blog post on each set of functionalities 30 | - presentation of examples 31 | - get documentation online 32 | - simplify functions 33 | - improve documentation (references, examples, testing, data shapes!!, help options) 34 | 35 | ## Testing 36 | 37 | - expand test cases 38 | - efficiency of code 39 | 40 | ## Organization 41 | 42 | - reorganize based on use... how import statement should work 43 | - make sample_rate, samprate, samplingrate, sr namespace consistent 44 | - make features/feature_type namespace consistent 45 | - use keyword arguments for librosa and scipy? 46 | - simplify 47 | 48 | 49 | ## Organization ideas: 50 | 51 | pyst.loadsound(audiofile, sr) 52 | pyst.playsound(audiofile, sr)? 53 | pyst.plotsound(audiofile, sr, feature_type) 54 | 55 | pyst.data.train_val_test(input_data, output_data) 56 | pyst.data.analyze(audo_dir)? For example for audio types, lengths?, sizes? etc. Useful for logging? 57 | pyst.feats.plot() 58 | pyst.feats.hear() 59 | pyst.feats.extract() 60 | model = pyst.models.speechrec_simple() # model will be a class instance.. 61 | history = pyst.models.train(model, train_path, val_path) 62 | matplotplib.pyplot.plot(history) ? 63 | pyst.models.plot(history) 64 | pyst.models.run(model, test_path) 65 | 66 | pyst.filters.wiener() 67 | pyst.filters.bandsubtraction() 68 | pyst.models.soundclassifier() 69 | pyst.models.autoencoder_denoise() 70 | pyst.models.speechrec() 71 | -------------------------------------------------------------------------------- /audiodata/background_samples/README.md: -------------------------------------------------------------------------------- 1 | ## Background Noise Examples 2 | 3 | These sounds were downloaded from freesound.org and are licensed under the Creative Commons 0 License. 4 | 5 | They have been limited to 10 seconds and the sample rate reduced to 16Hz to reduce their sizes. 6 | 7 | ### cafe.wav 8 | 9 | 387030__antonybk__cafe-takk-northern-quarter-manchester.wav 10 | 11 | ### traffic.wav 12 | 13 | 261344__ivolipa__city-traffic-day.wav 14 | 15 | ### fridge.wav 16 | 17 | 237399__squareal__fridge-tone.wav 18 | -------------------------------------------------------------------------------- /audiodata/background_samples/cafe.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/background_samples/cafe.wav -------------------------------------------------------------------------------- /audiodata/background_samples/fridge.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/background_samples/fridge.wav -------------------------------------------------------------------------------- /audiodata/background_samples/traffic.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/background_samples/traffic.wav -------------------------------------------------------------------------------- /audiodata/car_horn.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/car_horn.wav -------------------------------------------------------------------------------- /audiodata/models/denoiser/example_denoiser_stft.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/models/denoiser/example_denoiser_stft.h5 -------------------------------------------------------------------------------- /audiodata/models/denoiser/log_extraction_settings.csv: -------------------------------------------------------------------------------- 1 | dur_sec,3 2 | feature_type,stft noisy 3 | feat_type,stft 4 | complex_vals,True 5 | sr,22050 6 | num_feats,177 7 | n_fft,352 8 | win_size_ms,16 9 | frame_length,352 10 | percent_overlap,0.5 11 | window,hann 12 | frames_per_sample,11 13 | labeled_data,False 14 | visualize,True 15 | input_shape,"(35, 11, 177)" 16 | desired_shape,"(385, 177)" 17 | use_librosa,True 18 | center,True 19 | mode,reflect 20 | subsection_data,True 21 | divide_factor,10 22 | -------------------------------------------------------------------------------- /audiodata/python.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/python.wav -------------------------------------------------------------------------------- /build_aju_image.sh: -------------------------------------------------------------------------------- 1 | # chmod u+x build_aju_image.sh 2 | 3 | docker build . -t aju 4 | -------------------------------------------------------------------------------- /doc_requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-rtd-theme 2 | sphinx-gallery 3 | numpydoc 4 | pillow 5 | ipython 6 | pandas 7 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/augment.rst: -------------------------------------------------------------------------------- 1 | 2 | Augment audio data 3 | ------------------ 4 | 5 | .. automodule:: soundpy.augment 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/builtin_sp.rst: -------------------------------------------------------------------------------- 1 | 2 | Built-In Functionality (non Deep Learning) 3 | ------------------------------------------ 4 | 5 | .. automodule:: soundpy.builtin 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/builtin_spdl.rst: -------------------------------------------------------------------------------- 1 | 2 | Built-In Functionality (Deep Learning) 3 | -------------------------------------- 4 | 5 | .. automodule:: soundpy.models.builtin 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/changelog.rst: -------------------------------------------------------------------------------- 1 | ********* 2 | Changelog 3 | ********* 4 | 5 | v0.1.0a 6 | ======= 7 | 8 | v0.1.0a2 9 | -------- 10 | 2020-08-13 11 | 12 | 13 | Bug fixes 14 | - added `use_beg_ms` parameter in `soundpy.dsp.vad`: improved VAD recognition of silences post speech. 15 | 16 | Features 17 | - added GPU option: provide instructions and Docker image for running SoundPy with GPU 18 | - added `extend_window_ms` paremeter to `soundpy.feats.get_vad_samples` and `soundpy.feats.get_vad_stft`: can extend VAD window if desired. Useful in higher SNR environments. 19 | - added `soundpy.feats.get_samples_clipped` and `soundpy.feats.get_stft_clipped` to clip off beginning and ending silences. 20 | - added `beg_end_clipped` parameter to `soundpy.feats.plot_vad` to visualize VAD by clipping the beginning and ending silences (if True) or VAD instances throughout the signal (if False). 21 | - added `soundpy.models.dataprep.GeneratorFeatExtraction` class for extracting and augmenting features during training (still experimental). 22 | - added `soundpy.models.builtin.envclassifier_extract_train` as an example of extracting and augmenting features during training (still experimental). 23 | - added `soundpy.dsp.clip_at_zero` to enable smoother concatenations of signals and enables removal of clicks at beginning and ending of signals. 24 | - added `soundpy.dsp.remove_dc_bias` to enable smoother concatenations of signals 25 | - added and set `remove_dc` parameter to True in `soundpy.files.loadsound` and `soundpy.files.savesound` to ensure signals all have mean zero. 26 | - added `mirror_sound` option to `soundpy.dsp.apply_sample_length` as a way to extend sound. 27 | - added `soundpy.dsp.ismono` to check if samples were mono or stereo. 28 | - added `soundpy.dsp.average_channels` to average sample amplitudes across channels, e.g. to identify where high energy begins / ends in the signal without disregarding additional channels (if stereo sound). 29 | - added `soundpy.dsp.add_channels` for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound) 30 | - added stereo sound functionality to `soundpy.dsp.add_backgroundsound`, `soundpy.dsp.clip_at_zero`, `soundpy.dsp.calc_fft`, `soundpy.feats.get_stft`, `soundpy.feats.get_vad_stft` 31 | 32 | 33 | Other changes 34 | - name change: from pysoundtool to soundpy: simpler 35 | - updated dependencies to newest versions still compatible with Tensorflow 2.1.0 36 | - moved `soundpy.dsp.get_vad_samples` to `soundpy.feats.get_vad_samples` 37 | - moved `soundpy.dsp.get_vad_stft` to `soundpy.feats.get_vad_stft` 38 | - name change: allow `soundpy.feats.normalize` to be used as `soundpy.normalize` 39 | - removed `pysoundtool_online` and mybinder button as maintaining the online version was not easily done. Aim to reimplement at some point. 40 | 41 | 42 | 43 | v0.1.0a1 44 | ======== 45 | 46 | Initial public alpha release. 47 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/datasets.rst: -------------------------------------------------------------------------------- 1 | 2 | Organizing datasets 3 | ------------------- 4 | 5 | .. automodule:: soundpy.datasets 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/dsp.rst: -------------------------------------------------------------------------------- 1 | 2 | Working with signals 3 | -------------------- 4 | 5 | .. automodule:: soundpy.dsp 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/example_cases.rst: -------------------------------------------------------------------------------- 1 | .. toctree:: 2 | :maxdepth: 2 3 | 4 | .. include:: auto_examples/index.rst 5 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/examples/README.txt: -------------------------------------------------------------------------------- 1 | 2 | ----------------------------- 3 | SoundPy Example Use Cases 4 | ----------------------------- 5 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/examples/plot_SNR_add_noise_to_datasets.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | """ 4 | ========================================== 5 | Add Noise to Speech at Specific SNR Levels 6 | ========================================== 7 | 8 | Add noise to speech at specific signal-to-noise ratio levels. 9 | 10 | To see how soundpy implements this, see `soundpy.dsp.add_backgroundsound`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | 17 | 18 | ##################################################################### 19 | # Let's import soundpy, and ipd for playing audio data 20 | import soundpy as sp 21 | import IPython.display as ipd 22 | 23 | 24 | ###################################################### 25 | # Define the speech and noise data samples 26 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 27 | 28 | ###################################################### 29 | # I will use speech and noise data from the soundpy repo. 30 | 31 | ########################################################## 32 | # Designate path relevant for accessing audiodata 33 | sp_dir = '../../../' 34 | 35 | ########################################################## 36 | # Speech sample: 37 | speech_sample = '{}audiodata/python.wav'.format(sp_dir) 38 | speech_sample = sp.utils.string2pathlib(speech_sample) 39 | # as pathlib object, can do the following: 40 | word = speech_sample.stem 41 | word 42 | 43 | ########################################################## 44 | # Noise sample: 45 | noise_sample = '{}audiodata/background_samples/cafe.wav'.format(sp_dir) 46 | noise_sample = sp.utils.string2pathlib(noise_sample) 47 | # as pathlib object, can do the following: 48 | noise = noise_sample.stem 49 | noise 50 | 51 | 52 | ########################################################## 53 | # Hear Clean Speech 54 | # ~~~~~~~~~~~~~~~~~ 55 | # I'm using a higher sample rate here as calculating SNR 56 | # performs best upwards of 44100 Hz. 57 | sr = 44100 58 | s, sr = sp.loadsound(speech_sample, sr = sr) 59 | ipd.Audio(s,rate=sr) 60 | 61 | 62 | ########################################################## 63 | # Hear Noise 64 | # ~~~~~~~~~~ 65 | n, sr = sp.loadsound(noise_sample, sr = sr) 66 | ipd.Audio(n,rate=sr) 67 | 68 | 69 | ########################################################## 70 | # Hear Signal-to-Noise Ratio 20 71 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 72 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound( 73 | speech_sample, 74 | noise_sample, 75 | sr = sr, 76 | snr = 20) 77 | ipd.Audio(noisyspeech_20snr,rate=sr) 78 | 79 | ######################################################## 80 | # `snr20` is simply the measured SNR post adjustment fo the noise signal. 81 | # This is useful to check that the indicated snr is at least close 82 | # to the resulting snr. 83 | snr20 84 | 85 | ########################################################## 86 | # Hear Signal-to-Noise Ratio 5 87 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 88 | noisyspeech_5snr, snr5 = sp.dsp.add_backgroundsound( 89 | speech_sample, 90 | noise_sample, 91 | sr = sr, 92 | snr = 5) 93 | ipd.Audio(noisyspeech_5snr,rate=sr) 94 | 95 | ######################################################### 96 | snr5 97 | 98 | ###################################################################### 99 | # Visualize the Audio Samples 100 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^ 101 | 102 | ###################################################################### 103 | # See Clean Speech (raw signal) 104 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 105 | sp.plotsound(speech_sample, feature_type='signal', 106 | sr = sr, title = 'Speech: ' + word.upper()) 107 | 108 | ###################################################################### 109 | # See Clean Speech (stft) 110 | # ~~~~~~~~~~~~~~~~~~~~~~~ 111 | sp.plotsound(speech_sample, feature_type='stft', 112 | sr = sr, title = 'Speech: ' + word.upper()) 113 | 114 | ###################################################################### See Noise (raw signal) 115 | # ~~~~~~~~~~~~~~~~~~~~~~ 116 | sp.plotsound(noise_sample, feature_type='signal', 117 | title = 'Noise: ' + noise.upper()) 118 | 119 | ###################################################################### See Noise (stft) 120 | # ~~~~~~~~~~~~~~~~ 121 | sp.plotsound(noise_sample, feature_type='stft', 122 | title = 'Noise: ' + noise.upper()) 123 | 124 | ###################################################################### 125 | # See Noisy Speech: SNR 20 (raw signal) 126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 127 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal', 128 | title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper())) 129 | 130 | ###################################################################### 131 | # See Noisy Speech: SNR 20 (stft) 132 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 133 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'stft', 134 | title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper())) 135 | 136 | ###################################################################### 137 | # See Noisy Speech: SNR 5 (raw signal) 138 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 139 | sp.plotsound(noisyspeech_5snr, sr = sr, feature_type = 'signal', 140 | title = '"{}" with {} noise at SNR 5'.format(word.upper(), noise.upper())) 141 | 142 | ###################################################################### 143 | # See Noisy Speech: SNR 5 (stft) 144 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 145 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'stft', 146 | title = '"{}" with {} noise at SNR 5'.format(word.upper(), noise.upper())) 147 | 148 | ###################################################################### 149 | # Make Combined Sound Longer 150 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^ 151 | 152 | ########################################################## 153 | # Pad Speech and Set Total Length 154 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 155 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound( 156 | speech_sample, 157 | noise_sample, 158 | sr = sr, 159 | snr = 20, 160 | pad_mainsound_sec = 1, 161 | total_len_sec = 4) 162 | 163 | ########################################################## 164 | ipd.Audio(noisyspeech_20snr,rate=sr) 165 | 166 | ########################################################## 167 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal', 168 | title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper())) 169 | 170 | 171 | ###################################################################### 172 | # Make Combined Sound Shorter 173 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^ 174 | 175 | ########################################################## 176 | # Set Total Length 177 | # ~~~~~~~~~~~~~~~~ 178 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound( 179 | speech_sample, 180 | noise_sample, 181 | sr = sr, 182 | snr = 20, 183 | total_len_sec = 0.5) 184 | 185 | ########################################################## 186 | ipd.Audio(noisyspeech_20snr,rate=sr) 187 | 188 | ########################################################## 189 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal', 190 | title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper())) 191 | 192 | ###################################################################### 193 | # Wrap the Background Sound 194 | # ^^^^^^^^^^^^^^^^^^^^^^^^^ 195 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound( 196 | speech_sample, 197 | noise_sample, 198 | sr = sr, 199 | snr = 20, 200 | wrap = True, 201 | pad_mainsound_sec = 2, 202 | total_len_sec = 5) 203 | 204 | ########################################################## 205 | ipd.Audio(noisyspeech_20snr,rate=sr) 206 | 207 | ########################################################## 208 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal', 209 | title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper())) 210 | 211 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/examples/plot_dataset_info_formatting.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ======================================== 4 | Audio Dataset Exploration and Formatting 5 | ======================================== 6 | 7 | Examine audio files within a dataset, and reformat them if desired. 8 | 9 | To see how soundpy implements this, see `soundpy.builtin.dataset_logger` and 10 | `soundpy.builtin.dataset_formatter`. 11 | """ 12 | 13 | ##################################################################### 14 | # Let's import soundpy 15 | import soundpy as sp 16 | 17 | ############################################################################################### 18 | # 19 | # Dataset Exploration 20 | # ^^^^^^^^^^^^^^^^^^^ 21 | 22 | ########################################################## 23 | # Designate path relevant for accessing audiodata 24 | sp_dir = '../../../' 25 | 26 | ########################################################## 27 | # I will explore files in a small dataset on my computer with varying file formats. 28 | dataset_path = '{}audiodata2/'.format(sp_dir) 29 | dataset_info_dict = sp.builtin.dataset_logger('{}audiodata2/'.format(sp_dir)); 30 | 31 | ######################################################################### 32 | # This returns our data in a dictionary, perfect for exploring via Pandas 33 | import pandas as pd 34 | all_data = pd.DataFrame(dataset_info_dict).T 35 | all_data.head() 36 | 37 | ################################### 38 | # Let's have a look at the audio files and how uniform they are: 39 | print('formats: ', all_data.format_type.unique()) 40 | print('bitdepth (types): ', all_data.bitdepth.unique()) 41 | print('mean duration (sec): ', all_data.dur_sec.mean()) 42 | print('std dev duration (sec): ', all_data.dur_sec.std()) 43 | print('min sample rate: ', all_data.sr.min()) 44 | print('max sample rate: ', all_data.sr.max()) 45 | print('number of channels: ', all_data.num_channels.unique()) 46 | 47 | 48 | ########################################################## 49 | # For a visual example, let's plot the count of various sample rates. (48000 Hz is high definition sound, 16000 Hz is wideband, and 8000 Hz is narrowband, similar to how speech sounds on the telephone.) 50 | all_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts') 51 | 52 | ############################################################################################### 53 | # Reformat a Dataset 54 | # ^^^^^^^^^^^^^^^^^^ 55 | 56 | ############################################################## 57 | # Let's say we have a dataset that we want to make consistent. 58 | # We can do that with soundpy 59 | new_dataset_dir = sp.builtin.dataset_formatter( 60 | dataset_path, 61 | recursive = True, # we want all the audio, even in nested directories 62 | format='WAV', 63 | bitdepth = 16, # if set to None, a default bitdepth will be applied 64 | sr = 8000, # narrowband 65 | mono = True, # ensure data all have 1 channel 66 | dur_sec = 3, # audio will be limited to 3 seconds 67 | zeropad = True, # audio shorter than 3 seconds will be zeropadded 68 | new_dir = './example_dir/', # if None, a time-stamped directory will be created for you 69 | overwrite = False # can set to True if you want to overwrite files 70 | ); 71 | 72 | ############################################### 73 | # Let's see what the audio data looks like now: 74 | dataset_formatted_dict = sp.builtin.dataset_logger(new_dataset_dir, recursive=True); 75 | formatted_data = pd.DataFrame(dataset_formatted_dict).T 76 | 77 | ##################### 78 | formatted_data.head() 79 | 80 | ################################### 81 | print('audio formats: ', formatted_data.format_type.unique()) 82 | print('bitdepth (types): ', formatted_data.bitdepth.unique()) 83 | print('mean duration (sec): ', formatted_data.dur_sec.mean()) 84 | print('std dev duration (sec): ', formatted_data.dur_sec.std()) 85 | print('min sample rate: ', formatted_data.sr.min()) 86 | print('max sample rate: ', formatted_data.sr.max()) 87 | print('number of channels: ', formatted_data.num_channels.unique()) 88 | 89 | ########################################################## 90 | # Now all the audio data is sampled at the same rate: 8000 Hz 91 | formatted_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts') 92 | 93 | ########################################### 94 | # There we go! 95 | # You can reformat only parts of the audio files, e.g. format or bitdepth. 96 | # If you leave parameters in sp.builtin.dataset_formatter as None, the original 97 | # settings of the audio file will be maintained (except for bitdepth. 98 | # A default bitdepth will be applied according to the format of the file); see `soundfile.default_subtype`. 99 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/examples/plot_featureprep_denoiser.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ======================================================= 4 | Feature Extraction for Denoising: Clean and Noisy Audio 5 | ======================================================= 6 | 7 | Extract acoustic features from clean and noisy datasets for 8 | training a denoising model, e.g. a denoising autoencoder. 9 | 10 | To see how soundpy implements this, see `soundpy.builtin.denoiser_feats`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | 17 | ##################################################################### 18 | import soundpy as sp 19 | import IPython.display as ipd 20 | 21 | ###################################################### 22 | # Prepare for Extraction: Data Organization 23 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 24 | 25 | ###################################################### 26 | # I will use a mini denoising dataset as an example 27 | 28 | # Example noisy data: 29 | data_noisy_dir = '/home/airos/Projects/Data/denoising/uwnu/noisy' 30 | # Example clean data: 31 | data_clean_dir = '/home/airos/Projects/Data/denoising/uwnu/clean/' 32 | # Where to save extracted features: 33 | data_features_dir = './audiodata/example_feats_models/denoiser/' 34 | 35 | ###################################################### 36 | # Choose Feature Type 37 | # ~~~~~~~~~~~~~~~~~~~ 38 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'. 39 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'. 40 | 41 | feature_type = 'stft' 42 | sr = 22050 43 | 44 | ###################################################### 45 | # Set Duration of Audio 46 | # ~~~~~~~~~~~~~~~~~~~~~ 47 | # How much audio in seconds used from each audio file. 48 | # the speech samples are about 3 seconds long. 49 | dur_sec = 3 50 | 51 | ###################################################### 52 | # Set Context Window / Number of Frames 53 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 54 | # How many sections should each sample be broken into? (optional) 55 | # Some research papers include a 'context window' or the like, 56 | # which this refers to. 57 | frames_per_sample = 11 58 | 59 | ####################################################################### 60 | # Option 1: Built-In Functionality: soundpy does everything for you 61 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 62 | 63 | ############################################################ 64 | # Define which data to use and which features to extract. 65 | # NOTE: beacuse of the very small dataset, will set 66 | # `perc_train` to a lower level than 0.8. (Otherwise, will raise error) 67 | # Everything else is based on defaults. A feature folder with 68 | # the feature data will be created in the current working directory. 69 | # (Although, you can set this under the parameter `data_features_dir`) 70 | # `visualize` saves periodic images of the features extracted. 71 | # This is useful if you want to know what's going on during the process. 72 | perc_train = 0.6 # with larger datasets this would be around 0.8 73 | extraction_dir = sp.denoiser_feats( 74 | data_clean_dir = data_clean_dir, 75 | data_noisy_dir = data_noisy_dir, 76 | sr = sr, 77 | feature_type = feature_type, 78 | dur_sec = dur_sec, 79 | frames_per_sample = frames_per_sample, 80 | perc_train = perc_train, 81 | limit = 200, 82 | visualize=True); 83 | extraction_dir 84 | 85 | ################################################################ 86 | # The extracted features, extraction settings applied, and 87 | # which audio files were assigned to which datasets 88 | # will be saved in the `extraction_dir` directory 89 | 90 | 91 | ############################################################ 92 | # And that's it! 93 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/examples/plot_featureprep_envclassifier.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ===================================== 4 | Feature Extraction for Classification 5 | ===================================== 6 | 7 | Extract acoustic features from labeled data for 8 | training an environment or speech classifier. 9 | 10 | To see how soundpy implements this, see `soundpy.builtin.envclassifier_feats`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | 17 | 18 | ##################################################################### 19 | import os, sys 20 | import inspect 21 | currentdir = os.path.dirname(os.path.abspath( 22 | inspect.getfile(inspect.currentframe()))) 23 | parentdir = os.path.dirname(currentdir) 24 | parparentdir = os.path.dirname(parentdir) 25 | packagedir = os.path.dirname(parparentdir) 26 | sys.path.insert(0, packagedir) 27 | 28 | import matplotlib.pyplot as plt 29 | import soundpy as sp 30 | import IPython.display as ipd 31 | package_dir = '../../../' 32 | os.chdir(package_dir) 33 | sp_dir = package_dir 34 | ###################################################### 35 | # Prepare for Extraction: Data Organization 36 | # ----------------------------------------- 37 | 38 | ###################################################### 39 | # I will use a sample speech commands data set: 40 | 41 | ########################################################## 42 | # Designate path relevant for accessing audiodata 43 | data_dir = '/home/airos/Projects/Data/sound/speech_commands_small_section/' 44 | 45 | ###################################################### 46 | # Choose Feature Type 47 | # ~~~~~~~~~~~~~~~~~~~ 48 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'. 49 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'. 50 | 51 | feature_type = 'fbank' 52 | 53 | ###################################################### 54 | # Set Duration of Audio 55 | # ~~~~~~~~~~~~~~~~~~~~~ 56 | # How much audio in seconds used from each audio file. 57 | # The example noise and speech files are only 1 second long 58 | dur_sec = 1 59 | 60 | 61 | ############################################################# 62 | # Built-In Functionality - soundpy extracts the features for you 63 | # ---------------------------------------------------------------------------- 64 | 65 | ############################################################ 66 | # Define which data to use and which features to extract 67 | # Everything else is based on defaults. A feature folder with 68 | # the feature data will be created in the current working directory. 69 | # (Although, you can set this under the parameter `data_features_dir`) 70 | # `visualize` saves periodic images of the features extracted. 71 | # This is useful if you want to know what's going on during the process. 72 | extraction_dir = sp.envclassifier_feats(data_dir, 73 | feature_type=feature_type, 74 | dur_sec=dur_sec, 75 | visualize=True); 76 | 77 | ################################################################ 78 | # The extracted features, extraction settings applied, and 79 | # which audio files were assigned to which datasets 80 | # will be saved in the following directory: 81 | extraction_dir 82 | 83 | ############################################################ 84 | # And that's it! 85 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/examples/plot_filter_out_noise.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | """ 4 | =========================== 5 | Filter Out Background Noise 6 | =========================== 7 | 8 | Filter out background noise from noisy speech signals. 9 | 10 | To see how soundpy implements this, see `soundpy.builtin.filtersignal`. 11 | 12 | As a general note for filtering, the Wiener Filter is the default filter for soundpy. It seems to filter signals more consequently than the Band Spectral Subtraction Filter. 13 | """ 14 | 15 | 16 | ############################################################################################### 17 | # 18 | 19 | 20 | ##################################################################### 21 | 22 | # Let's import soundpy, and ipd for playing audio data 23 | import soundpy as sp 24 | import IPython.display as ipd 25 | 26 | 27 | ###################################################### 28 | # Define the noisy and clean speech audio files. 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 30 | # Note: these files are available in the soundpy repo. 31 | # Designate path relevant for accessing audiodata 32 | sp_dir = '../../../' 33 | 34 | ########################################################## 35 | # Noise sample: 36 | noise = '{}audiodata/background_samples/traffic.wav'.format(sp_dir) 37 | noise = sp.string2pathlib(noise) 38 | speech = '{}audiodata/python.wav'.format(sp_dir) 39 | speech = sp.utils.string2pathlib(speech) 40 | 41 | ########################################################## 42 | # For filtering, we will set the sample rate to be quite high: 43 | sr = 48000 44 | 45 | ########################################################## 46 | # Create noisy speech signal as SNR 10 47 | noisy, snr_measured = sp.dsp.add_backgroundsound( 48 | speech, 49 | noise, 50 | sr = sr, 51 | snr = 10, 52 | total_len_sec = 3, 53 | pad_mainsound_sec = 0.75) 54 | 55 | ########################################################## 56 | # Hear and see the noisy speech 57 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 58 | 59 | ipd.Audio(noisy,rate=sr) 60 | 61 | ########################################################## 62 | sp.plotsound(noisy, sr=sr, feature_type='signal', 63 | title='Noisy Speech ') 64 | 65 | 66 | ########################################################## 67 | # Hear and see the clean speech 68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 | s, sr = sp.loadsound(speech, sr=sr) 70 | ipd.Audio(s,rate=sr) 71 | 72 | ########################################################## 73 | sp.plotsound(s, sr=sr, feature_type='signal', 74 | title='Clean Speech ') 75 | 76 | 77 | ########################################################## 78 | # Filter the noisy speech 79 | # ^^^^^^^^^^^^^^^^^^^^^^^ 80 | 81 | ########################################################## 82 | # Wiener Filter 83 | # ~~~~~~~~~~~~~ 84 | 85 | ########################################################## 86 | # Let's filter with a Wiener filter: 87 | noisy_wf, sr = sp.filtersignal(noisy, 88 | sr=sr, 89 | filter_type='wiener') # default 90 | 91 | ########################################################## 92 | ipd.Audio(noisy_wf,rate=sr) 93 | 94 | ########################################################## 95 | sp.plotsound(noisy_wf, sr=sr, feature_type='signal', 96 | title='Noisy Speech: Wiener Filter') 97 | 98 | ################################################################# 99 | # Wiener Filter with Postfilter 100 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 101 | 102 | ########################################################## 103 | # Let's filter with a Wiener filter and postfilter 104 | noisy_wfpf, sr = sp.filtersignal(noisy, 105 | sr=sr, 106 | filter_type='wiener', 107 | apply_postfilter = True) 108 | 109 | ########################################################## 110 | ipd.Audio(noisy_wfpf,rate=sr) 111 | 112 | ########################################################## 113 | sp.plotsound(noisy_wfpf, sr=sr, feature_type='signal', 114 | title='Noisy Speech: Wiener Filter with Postfilter') 115 | 116 | ################################################################# 117 | # Band Spectral Subtraction 118 | # ~~~~~~~~~~~~~~~~~~~~~~~~~ 119 | 120 | ########################################################## 121 | # Let's filter using band spectral subtraction 122 | noisy_bs, sr = sp.filtersignal(noisy, 123 | sr=sr, 124 | filter_type='bandspec') 125 | 126 | ########################################################## 127 | ipd.Audio(noisy_bs,rate=sr) 128 | 129 | ########################################################## 130 | sp.plotsound(noisy_bs, sr=sr, feature_type='signal', 131 | title='Noisy Speech: Band Spectral Subtraction') 132 | 133 | 134 | ################################################################# 135 | # Band Spectral Subtraction with Postfilter 136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 137 | 138 | ######################################################################### 139 | # Finally, let's filter using band spectral subtraction with a postfilter 140 | noisy_bspf, sr = sp.filtersignal(noisy, 141 | sr=sr, 142 | filter_type='bandspec', 143 | apply_postfilter = True) 144 | 145 | ########################################################## 146 | ipd.Audio(noisy_bspf,rate=sr) 147 | 148 | ########################################################## 149 | sp.plotsound(noisy_bspf, sr=sr, feature_type='signal', 150 | title='Noisy Speech: Band Spectral Subtraction with Postfilter') 151 | 152 | 153 | ########################################################## 154 | # Filter: increase the scale 155 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^ 156 | 157 | ########################################################## 158 | # Let's filter with a Wiener filter: 159 | filter_scale = 5 160 | noisy_wf, sr = sp.filtersignal(noisy, 161 | sr=sr, 162 | filter_type='wiener', 163 | filter_scale = filter_scale) 164 | 165 | ########################################################## 166 | # Wiener Filter 167 | # ~~~~~~~~~~~~~ 168 | 169 | ########################################################## 170 | ipd.Audio(noisy_wf,rate=sr) 171 | 172 | ########################################################## 173 | sp.plotsound(noisy_wf, sr=sr, feature_type='signal', 174 | title='Noisy Speech: Wiener Filter Scale {}'.format(filter_scale)) 175 | 176 | ################################################################# 177 | # Wiener Filter with Postfilter 178 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 179 | 180 | ########################################################## 181 | # Let's filter with a Wiener filter and postfilter 182 | noisy_wfpf, sr = sp.filtersignal(noisy, 183 | sr=sr, 184 | filter_type='wiener', 185 | apply_postfilter = True, 186 | filter_scale = filter_scale) 187 | 188 | ########################################################## 189 | ipd.Audio(noisy_wfpf,rate=sr) 190 | 191 | ########################################################## 192 | sp.plotsound(noisy_wfpf, sr=sr, feature_type='signal', 193 | title='Noisy Speech: Wiener Filter with Postfilter Scale {}'.format(filter_scale)) 194 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/examples/plot_implement_denoiser.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ================================= 4 | Implement a Denoising Autoencoder 5 | ================================= 6 | 7 | Implement denoising autoencoder to denoise a noisy speech signal. 8 | 9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_run`. 10 | """ 11 | 12 | 13 | ############################################################################################ 14 | # 15 | 16 | ##################################################################### 17 | # Let's import soundpy and other packages 18 | import soundpy as sp 19 | import numpy as np 20 | # for playing audio in this notebook: 21 | import IPython.display as ipd 22 | 23 | ##################################################################### 24 | # As well as the deep learning component of soundpy 25 | from soundpy import models as spdl 26 | 27 | ###################################################### 28 | # Prepare for Implementation: Data Organization 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 30 | 31 | ########################################################## 32 | # Set path relevant for audio data for this example 33 | sp_dir = '../../../' 34 | 35 | ###################################################### 36 | # Set model pathway 37 | # ~~~~~~~~~~~~~~~~~ 38 | # Currently, this expects a model saved with weights, with a .h5 extension. 39 | # (See `model` below) 40 | 41 | ###################################################### 42 | # The soundpy repo offers a pre-trained denoiser, which we'll use. 43 | model = '{}audiodata/models/'.format(sp_dir)+\ 44 | 'denoiser/example_denoiser_stft.h5' 45 | # ensure is a pathlib.PosixPath object 46 | print(model) 47 | model = sp.utils.string2pathlib(model) 48 | model_dir = model.parent 49 | 50 | ######################################################### 51 | # What is in this folder? 52 | files = list(model_dir.glob('*.*')) 53 | for f in files: 54 | print(f.name) 55 | 56 | ###################################################### 57 | # Provide dictionary with feature extraction settings 58 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 59 | 60 | ######################################################### 61 | # If soundpy extracts features for you, a 'log_extraction_settings.csv' 62 | # file will be saved, which includes relevant feature settings for implementing 63 | # the model; see `soundpy.feats.save_features_datasets` 64 | feat_settings = sp.utils.load_dict( 65 | model_dir.joinpath('log_extraction_settings.csv')) 66 | for key, value in feat_settings.items(): 67 | print(key, ' --> ', value) 68 | # change objects that were string to original format 69 | import ast 70 | try: 71 | feat_settings[key] = ast.literal_eval(value) 72 | except ValueError: 73 | pass 74 | except SyntaxError: 75 | pass 76 | 77 | ######################################################### 78 | # For the purposes of plotting, let's use some of the settings defined: 79 | feature_type = feat_settings['feature_type'] 80 | sr = feat_settings['sr'] 81 | 82 | ###################################################### 83 | # Provide new audio for the denoiser to denoise! 84 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 85 | 86 | ######################################################### 87 | # We'll use sample speech from the soundpy repo: 88 | speech = sp.string2pathlib('{}audiodata/python.wav'.format(sp_dir)) 89 | s, sr = sp.loadsound(speech, sr=sr) 90 | 91 | ######################################################### 92 | # Let's add some white noise (10 SNR) 93 | s_n = sp.augment.add_white_noise(s, sr=sr, snr=10) 94 | 95 | ############################################################## 96 | # What does the noisy audio sound like? 97 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 98 | ipd.Audio(s_n,rate=sr) 99 | 100 | ############################################################## 101 | # What does the noisy audio look like? 102 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 103 | sp.plotsound(s_n, sr = sr, feature_type='signal') 104 | 105 | ############################################################## 106 | # What does the clean audio sound like? 107 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 108 | ipd.Audio(s,rate=sr) 109 | 110 | ############################################################## 111 | # What does the clean audio look like? 112 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 113 | sp.plotsound(s, sr = sr, feature_type='signal') 114 | 115 | ######################################################################### 116 | # Built-In Denoiser Functionality 117 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 118 | 119 | ############################################################## 120 | # We just need to feed the model path, the noisy sample path, and 121 | # the feature settings dictionary we looked at above. 122 | y, sr = spdl.denoiser_run(model, s_n, feat_settings) 123 | 124 | ########################################################## 125 | # How does the output sound? 126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~ 127 | ipd.Audio(y,rate=sr) 128 | 129 | ########################################################## 130 | # How does is the output look? 131 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 132 | sp.plotsound(y, sr=sr, feature_type = 'signal') 133 | 134 | ########################################################## 135 | # How do the features compare? 136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 137 | 138 | ########################################################## 139 | # STFT features of the noisy input speech: 140 | sp.plotsound(s_n, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db', 141 | title = 'Noisy input: STFT features') 142 | 143 | ########################################################## 144 | # STFT features of the output 145 | sp.plotsound(y, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db', 146 | title = 'Denoiser Output: STFT features') 147 | 148 | ########################################################## 149 | # STFT features of the clean version of the audio: 150 | sp.plotsound(s, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db', 151 | title = 'Clean "target" audio: STFT features') 152 | 153 | 154 | ########################################################## 155 | # It's not perfect but for a pretty simple implementation, the noise is gone 156 | # and you can hear the person speaking. Pretty cool! 157 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/examples/plot_signals_and_features.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ======================= 4 | Create and Plot Signals 5 | ======================= 6 | 7 | Create and plot signals / noise; combine them at a specific SNR. 8 | 9 | To see how soundpy implements this, see `soundpy.dsp.generate_sound`, 10 | `soundpy.dsp.generate_noise` and `soundpy.dsp.add_backgroundsound`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | 17 | ##################################################################### 18 | # Let's import soundpy 19 | import soundpy as sp 20 | 21 | ########################################################################### 22 | # Create a Signal 23 | # ^^^^^^^^^^^^^^^ 24 | 25 | ######################################################################## 26 | # First let's set what sample rate we want to use 27 | sr = 44100 28 | 29 | 30 | ######################################################################### 31 | # Let's create a signal of 10 Hz 32 | sig1_hz = 10 33 | sig1, sr = sp.generate_sound(freq=sig1_hz, amplitude = 0.4, sr=sr, dur_sec=1) 34 | sp.plotsound(sig1, sr=sr, feature_type = 'signal', 35 | title = 'Signal: {} Hz'.format(sig1_hz)) 36 | 37 | 38 | ######################################################################### 39 | # Let's create a signal of 20 Hz 40 | sig2_hz = 20 41 | sig2, sr = sp.generate_sound(freq=sig2_hz, amplitude= 0.4, sr=sr, dur_sec=1) 42 | sp.plotsound(sig2, sr=sr, feature_type = 'signal', 43 | title = 'Signal: {} Hz'.format(sig2_hz)) 44 | 45 | ########################################################################### 46 | # Combine Signals 47 | # ^^^^^^^^^^^^^^^ 48 | 49 | 50 | ######################################################################### 51 | # Add them together and see what they look like: 52 | sig3 = sig1 + sig2 53 | sp.plotsound(sig3, sr=sr, feature_type = 'signal', 54 | title='Mixed Signals: {} Hz + {} Hz'.format(sig1_hz, sig2_hz)) 55 | 56 | 57 | ########################################################################## 58 | # Generate Pseudo-Random Noise 59 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 60 | 61 | 62 | ######################################################################### 63 | # Create noise to add to the signal: 64 | noise = sp.generate_noise(len(sig3), amplitude=0.025, random_seed=40) 65 | sp.plotsound(noise, sr=sr, feature_type = 'signal', 66 | title='Random Noise') 67 | 68 | ########################################################################### 69 | # Control SNR: Adding a Background Sound 70 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 71 | 72 | ######################################################################### 73 | # Add noise at signal-to-noise ratio of 40 74 | sig_noisy, snr = sp.dsp.add_backgroundsound( 75 | audio_main = sig3, 76 | audio_background = noise, 77 | sr = sr, 78 | snr = 40, 79 | clip_at_zero = False) 80 | 81 | # keep energy between 1 and -1 82 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 83 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 40 SNR') 84 | 85 | ######################################################################### 86 | # Add noise at signal-to-noise ratio of 20 87 | sig_noisy, snr = sp.dsp.add_backgroundsound( 88 | audio_main = sig3, 89 | audio_background = noise, 90 | sr = sr, 91 | snr = 20) 92 | # keep energy between 1 and -1 93 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 94 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 20 SNR') 95 | 96 | ######################################################################### 97 | # Add noise at signal-to-noise ratio of 10 98 | sig_noisy, snr = sp.dsp.add_backgroundsound( 99 | audio_main = sig3, 100 | audio_background = noise, 101 | sr = sr, 102 | snr = 10) 103 | # keep energy between 1 and -1 104 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 105 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 10 SNR') 106 | 107 | ######################################################################### 108 | # Add noise at signal-to-noise ratio of 0 109 | sig_noisy, snr = sp.dsp.add_backgroundsound( 110 | audio_main = sig3, 111 | audio_background = noise, 112 | sr = sr, 113 | snr = 0) 114 | # keep energy between 1 and -1 115 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 116 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 0 SNR') 117 | 118 | 119 | ######################################################################### 120 | # Add noise at signal-to-noise ratio of -10 121 | sig_noisy, snr = sp.dsp.add_backgroundsound( 122 | audio_main = sig3, 123 | audio_background = noise, 124 | sr = sr, 125 | snr = -10) 126 | # keep energy between 1 and -1 127 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 128 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: -10 SNR') 129 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/examples/plot_train_classifier.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ============================ 4 | Train an Acoustic Classifier 5 | ============================ 6 | 7 | Train an acoustic classifier on speech or noise features. 8 | 9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_train`. 10 | """ 11 | 12 | ############################################################################################### 13 | # 14 | 15 | ##################################################################### 16 | # Let's import soundpy for handling sound 17 | import soundpy as sp 18 | ##################################################################### 19 | # As well as the deep learning component of soundpy 20 | from soundpy import models as spdl 21 | 22 | 23 | ###################################################### 24 | # Prepare for Training: Data Organization 25 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 26 | 27 | ########################################################## 28 | # Set path relevant for audio data for this example 29 | sp_dir = '../../../' 30 | 31 | ###################################################### 32 | # I will load previously extracted features (from the Speech Commands Dataset) 33 | # See `soundpy.feats.save_features_datasets` or `soundpy.builtin.envclassifier_feats` 34 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\ 35 | 'envclassifier/example_feats_fbank/' 36 | 37 | ######################################################### 38 | # What is in this folder? 39 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir) 40 | files = list(feature_extraction_dir.glob('*.*')) 41 | for f in files: 42 | print(f.name) 43 | 44 | ######################################################### 45 | # The .npy files contain the features themselves, in train, validation, and 46 | # test datasets: 47 | files = list(feature_extraction_dir.glob('*.npy')) 48 | for f in files: 49 | print(f.name) 50 | 51 | ######################################################### 52 | # The .csv files contain information about how the features were extracted 53 | files = list(feature_extraction_dir.glob('*.csv')) 54 | for f in files: 55 | print(f.name) 56 | 57 | ######################################################### 58 | # We'll have a look at which features were extracted and other settings: 59 | feat_settings = sp.utils.load_dict( 60 | feature_extraction_dir.joinpath('log_extraction_settings.csv')) 61 | for key, value in feat_settings.items(): 62 | print(key, ' --> ', value) 63 | 64 | ######################################################### 65 | # For more about these settings, see `soundpy.feats.save_features_datasets`. 66 | 67 | ######################################################### 68 | # We'll have a look at the audio files that were assigned 69 | # to the train, val, and test datasets. 70 | audio_datasets = sp.utils.load_dict( 71 | feature_extraction_dir.joinpath('dataset_audiofiles.csv')) 72 | count = 0 73 | for key, value in audio_datasets.items(): 74 | print(key, ' --> ', value) 75 | count += 1 76 | if count > 5: 77 | break 78 | 79 | ############################################################# 80 | # Built-In Functionality: soundpy does everything for you 81 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 82 | # For more about this function, see `soundpy.models.builtin.envclassifier_train`. 83 | 84 | ############################################################# 85 | model_dir, history = spdl.envclassifier_train( 86 | feature_extraction_dir = feature_extraction_dir, 87 | epochs = 50, 88 | patience = 30) 89 | 90 | ############################################################# 91 | # Where the model and logs are located: 92 | model_dir 93 | 94 | ############################################################# 95 | # Let's plot how the model performed (on this mini dataset) 96 | import matplotlib.pyplot as plt 97 | plt.clf() 98 | plt.plot(history.history['accuracy']) 99 | plt.plot(history.history['val_accuracy']) 100 | plt.title('model accuracy') 101 | plt.ylabel('accuracy') 102 | plt.xlabel('epoch') 103 | plt.legend(['train', 'val'], loc='upper right') 104 | plt.show() 105 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/examples/plot_train_denoiser.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ============================= 4 | Train a Denoising Autoencoder 5 | ============================= 6 | 7 | Train a denoising autoencoder with clean and noisy acoustic features. 8 | 9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_train`, 10 | `soundpy.builtin.denoiser_feats` and `soundpy.builtin.create_denoise_data`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | 17 | 18 | ##################################################################### 19 | # Let's import soundpy for handling sound 20 | import soundpy as sp 21 | ##################################################################### 22 | # As well as the deep learning component of soundpy 23 | from soundpy import models as spdl 24 | 25 | 26 | ###################################################### 27 | # Prepare for Training: Data Organization 28 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 29 | 30 | ########################################################## 31 | # Designate path relevant for accessing audiodata 32 | sp_dir = '../../../' 33 | 34 | 35 | ###################################################### 36 | # I will load previously extracted features (sample data), see `soundpy.feats.save_features_datasets` or `soundpy.builtin.denoiser_feats` 37 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\ 38 | 'denoiser/example_feats_fbank/' 39 | 40 | ######################################################### 41 | # What is in this folder? 42 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir) 43 | files = list(feature_extraction_dir.glob('*.*')) 44 | for f in files: 45 | print(f.name) 46 | 47 | ######################################################### 48 | # The .npy files contain the features themselves, in train, validation, and 49 | # test datasets: 50 | files = list(feature_extraction_dir.glob('*.npy')) 51 | for f in files: 52 | print(f.name) 53 | 54 | ######################################################### 55 | # The .csv files contain information about how the features were extracted 56 | files = list(feature_extraction_dir.glob('*.csv')) 57 | for f in files: 58 | print(f.name) 59 | 60 | ######################################################### 61 | # We'll have a look at which features were extracted and other settings: 62 | feat_settings = sp.utils.load_dict( 63 | feature_extraction_dir.joinpath('log_extraction_settings.csv')) 64 | for key, value in feat_settings.items(): 65 | print(key, ' ---> ', value) 66 | 67 | ######################################################### 68 | # For more about these settings, see `soundpy.feats.save_features_datasets`. 69 | 70 | ######################################################### 71 | # We'll have a look at the audio files that were assigned 72 | # to the train, val, and test datasets. 73 | audio_datasets = sp.utils.load_dict( 74 | feature_extraction_dir.joinpath('audiofiles_datasets_clean.csv')) 75 | count = 0 76 | for key, value in audio_datasets.items(): 77 | print(key, ' ---> ', value) 78 | count += 1 79 | if count > 5: 80 | break 81 | 82 | ############################################################# 83 | # Built-In Functionality: soundpy does everything for you 84 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 85 | # For more about this, see `soundpy.builtin.denoiser_train`. 86 | 87 | ############################################################# 88 | model_dir, history = spdl.denoiser_train( 89 | feature_extraction_dir = feature_extraction_dir, 90 | epochs = 50) 91 | 92 | ######################################################### 93 | 94 | 95 | ############################################################# 96 | # Where the model and logs are located: 97 | model_dir 98 | 99 | 100 | ############################################################# 101 | # Let's plot how the model performed (on this mini dataset) 102 | 103 | import matplotlib.pyplot as plt 104 | plt.plot(history.history['loss']) 105 | plt.plot(history.history['val_loss']) 106 | plt.title('model loss') 107 | plt.ylabel('loss') 108 | plt.xlabel('epoch') 109 | plt.legend(['train', 'val'], loc='upper right') 110 | plt.show() 111 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/exceptions.rst: -------------------------------------------------------------------------------- 1 | 2 | Customized Errors 3 | ----------------- 4 | 5 | .. automodule:: soundpy.exceptions 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/feats.rst: -------------------------------------------------------------------------------- 1 | 2 | Extract and manipulate audio features 3 | ------------------------------------- 4 | 5 | .. automodule:: soundpy.feats 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/files.rst: -------------------------------------------------------------------------------- 1 | 2 | Working with audio files 3 | ------------------------ 4 | 5 | .. automodule:: soundpy.files 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/filters.rst: -------------------------------------------------------------------------------- 1 | 2 | Filters: Wiener and Band Spectral Subtraction 3 | --------------------------------------------- 4 | 5 | .. automodule:: soundpy.filters 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | .. autoclass:: soundpy.filters.FilterSettings 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | .. automethod:: __init__ 16 | 17 | .. autoclass:: soundpy.filters.Filter 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | .. automethod:: __init__ 23 | 24 | 25 | .. autoclass:: soundpy.filters.WienerFilter 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | .. automethod:: __init__ 31 | 32 | 33 | .. autoclass:: soundpy.filters.BandSubtraction 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | .. automethod:: __init__ 39 | 40 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/index.rst: -------------------------------------------------------------------------------- 1 | .. SoundPy documentation master file, created by 2 | sphinx-quickstart on Mon Jun 15 11:57:18 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | SoundPy v0.1.0a2 7 | ==================== 8 | 9 | Welcome to the docs! 10 | -------------------- 11 | 12 | SoundPy is a research based Python package_ for exploring and experimenting with sound and deep learning. 13 | 14 | Those who might find this useful: 15 | 16 | * speech and sound enthusiasts 17 | * digital signal processing / mathematics / physics / acoustics enthusiasts 18 | * deep learning enthusiasts 19 | * researchers 20 | * linguists 21 | * psycholinguists 22 | 23 | The main goal of SoundPy is to provide the code and functionality with more context via visualization, research, and mathematics. Most of the resources used to build the functionality stems from publicly available research and datasets. 24 | 25 | As it covers quite a large range, from audio file conversion to implementation of trained neural networks, the purpose of SoundPy is not to be the perfect implementation of all functions (although that is also a goal :P ), but rather a peak into how they *can* be implemented, hopefully offering people a foundation for trying out different ways of implementation (feature extraction, building neural networks, etc.). 26 | 27 | This project is still in the beginning stages and has a lot of room for growth, especially with contributors having a background / knowlege in data science, computer science, machine and deep learning, physics, acoustics, or dsp. Contributors from other backgrounds are also welcome! If you'd like SoundPy to do something it doesn't, try making it or create an issue. 28 | 29 | .. _package: https://github.com/a-n-rose/Python-Sound-Tool/tree/master 30 | 31 | .. toctree:: 32 | :maxdepth: 2 33 | 34 | example_cases.rst 35 | readme.rst 36 | 37 | 38 | .. toctree:: 39 | :maxdepth: 1 40 | 41 | changelog.rst 42 | 43 | * :ref:`genindex` 44 | * :ref:`modindex` 45 | * :ref:`search` 46 | 47 | :Author: 48 | Aislyn Rose 49 | 50 | rose.aislyn.noelle@gmail.com 51 | 52 | webpage_ 53 | 54 | github_ 55 | 56 | .. _webpage: https://a-n-rose.github.io/ 57 | 58 | .. _github : https://github.com/a-n-rose 59 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/model_dataprep.rst: -------------------------------------------------------------------------------- 1 | 2 | Feeding large datasets to models 3 | -------------------------------- 4 | 5 | .. autoclass:: soundpy.models.dataprep.Generator 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | .. automethod:: __init__ 11 | 12 | 13 | .. automodule:: soundpy.models.dataprep 14 | :members: 15 | :undoc-members: 16 | :show-inheritance: 17 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/modelsetup.rst: -------------------------------------------------------------------------------- 1 | 2 | Additional model setup (e.g. Early Stopping) 3 | -------------------------------------------- 4 | 5 | .. automodule:: soundpy.models.modelsetup 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/modules.rst: -------------------------------------------------------------------------------- 1 | ========================= 2 | SoundPy Functionality 3 | ========================= 4 | 5 | .. include:: builtin_sp.rst 6 | 7 | .. include:: builtin_spdl.rst 8 | 9 | .. include:: augment.rst 10 | 11 | .. include:: files.rst 12 | 13 | .. include:: datasets.rst 14 | 15 | .. include:: dsp.rst 16 | 17 | .. include:: filters.rst 18 | 19 | .. include:: feats.rst 20 | 21 | .. include:: template_models.rst 22 | 23 | .. include:: modelsetup.rst 24 | 25 | .. include:: model_dataprep.rst 26 | 27 | .. include:: utils.rst 28 | 29 | .. include:: exceptions.rst 30 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: modules.rst 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/template_models.rst: -------------------------------------------------------------------------------- 1 | Template deep neural networks 2 | ----------------------------- 3 | 4 | .. automodule:: soundpy.models.template_models 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/0.1.0a2/utils.rst: -------------------------------------------------------------------------------- 1 | 2 | Other useful non-specific functionality 3 | --------------------------------------- 4 | 5 | .. automodule:: soundpy.utils 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/augment.rst: -------------------------------------------------------------------------------- 1 | 2 | Augment audio data 3 | ------------------ 4 | 5 | .. automodule:: soundpy.augment 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/builtin_sp.rst: -------------------------------------------------------------------------------- 1 | 2 | Built-In Functionality (non Deep Learning) 3 | ------------------------------------------ 4 | 5 | .. automodule:: soundpy.builtin 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/builtin_spdl.rst: -------------------------------------------------------------------------------- 1 | 2 | Built-In Functionality (Deep Learning) 3 | -------------------------------------- 4 | 5 | .. automodule:: soundpy.models.builtin 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/changelog.rst: -------------------------------------------------------------------------------- 1 | ********* 2 | Changelog 3 | ********* 4 | 5 | v0.1.0a 6 | ======= 7 | 8 | 9 | v0.1.0a3 10 | -------- 11 | 2021-04-09 12 | 13 | Bug fixes 14 | - no longer use Librosa for feature extraction: allow easier implementation of augmentations, especially during training. 15 | - `soundpy.feats.plot` now uses parameter `subprocess` to allow for different backends to be applied, depending on when funciton is called. For example, if plotting from within a Generator while training, `subprocess` should be set to True, and the 'Agg' backend will be applied. Otherwise, 'TkAgg' backend is used. Fixes issues with multi-threading. 16 | - Fixed generator and Tensorflow issue: with Tensorflow 2.2.0+ the models in `soundpy.models.builtin` that were trained via generator failed. Use `tensorflow.data.Dataset.from_generator` to feed generator data to models. 17 | - Improved `clip_at_zero`. 18 | 19 | Features 20 | - Python 3.8 can now be used. 21 | - throw depreciation warning for parameters `context_window` or `frames_per_sample` as these "features" will be removed from feature extraction. Rather the features can be reshaped post feature extraction. 22 | - added `timestep`, `axis_timestep`, `context_window`, `axis_context_window` and `combine_axes_0_1` paremeters to `soundpy.models.Generator`: allow more control over shape of the features. 23 | - can run `soundpy.models.builtin.envclassifier_extract_train` to run with pre-extracted val and test features. 24 | - `soundpy.feats.plotsound`, `soundpy.feats.plot_vad` and `soundpy.feats.plot_dom_freq` all can plot stereo sound: for each channel in a stereo signal, a plot is either generated or saved. If a filename already exists, a date stamp is added to filename to avoid overwriting images. 25 | - allow `grayscale2color` to be applied to 2D data. 26 | 27 | Breaking changes 28 | - `soundpy.models.Generator` uses parameter `normalize` instaed of `normalized`. Found this to be more intuitive. If `normalize` is set to True, data will be normalized. Before, if `normalized` was set to True, data would not be normalized. 29 | - removed `add_tensor_last` and `add_tensor_first`: require adding of tensors (for keras) to be included in parameter `desired_input_shape`. 30 | 31 | Other changes 32 | - CPU soundpy can use Tensorflow 2.1.0, 2.2.0 and 2.3.0. Dockerfile still uses Tensorflow 2.1.0 as it is still compatible with updated code. 33 | - `soundpy.models.builtin.implement_denoiser` raises warning if cleaned features cannot be converted to raw audio samples. 34 | 35 | 36 | v0.1.0a2 37 | -------- 38 | 2020-08-13 39 | 40 | 41 | Bug fixes 42 | - added `use_beg_ms` parameter in `soundpy.dsp.vad`: improved VAD recognition of silences post speech. 43 | 44 | Features 45 | - added GPU option: provide instructions and Docker image for running SoundPy with GPU 46 | - added `extend_window_ms` paremeter to `soundpy.feats.get_vad_samples` and `soundpy.feats.get_vad_stft`: can extend VAD window if desired. Useful in higher SNR environments. 47 | - added `soundpy.feats.get_samples_clipped` and `soundpy.feats.get_stft_clipped` to clip off beginning and ending silences. 48 | - added `beg_end_clipped` parameter to `soundpy.feats.plot_vad` to visualize VAD by clipping the beginning and ending silences (if True) or VAD instances throughout the signal (if False). 49 | - added `soundpy.models.dataprep.GeneratorFeatExtraction` class for extracting and augmenting features during training (still experimental). 50 | - added `soundpy.models.builtin.envclassifier_extract_train` as an example of extracting and augmenting features during training (still experimental). 51 | - added `soundpy.dsp.clip_at_zero` to enable smoother concatenations of signals and enables removal of clicks at beginning and ending of signals. 52 | - added `soundpy.dsp.remove_dc_bias` to enable smoother concatenations of signals 53 | - added and set `remove_dc` parameter to True in `soundpy.files.loadsound` and `soundpy.files.savesound` to ensure signals all have mean zero. 54 | - added `mirror_sound` option to `soundpy.dsp.apply_sample_length` as a way to extend sound. 55 | - added `soundpy.dsp.ismono` to check if samples were mono or stereo. 56 | - added `soundpy.dsp.average_channels` to average sample amplitudes across channels, e.g. to identify where high energy begins / ends in the signal without disregarding additional channels (if stereo sound). 57 | - added `soundpy.dsp.add_channels` for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound) 58 | - added stereo sound functionality to `soundpy.dsp.add_backgroundsound`, `soundpy.dsp.clip_at_zero`, `soundpy.dsp.calc_fft`, `soundpy.feats.get_stft`, `soundpy.feats.get_vad_stft` 59 | 60 | 61 | Other changes 62 | - name change: from pysoundtool to soundpy: simpler 63 | - updated dependencies to newest versions still compatible with Tensorflow 2.1.0 64 | - moved `soundpy.dsp.get_vad_samples` to `soundpy.feats.get_vad_samples` 65 | - moved `soundpy.dsp.get_vad_stft` to `soundpy.feats.get_vad_stft` 66 | - name change: allow `soundpy.feats.normalize` to be used as `soundpy.normalize` 67 | - removed `pysoundtool_online` and mybinder button as maintaining the online version was not easily done. Aim to reimplement at some point. 68 | 69 | 70 | 71 | v0.1.0a1 72 | ======== 73 | 74 | Initial public alpha release. 75 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/datasets.rst: -------------------------------------------------------------------------------- 1 | 2 | Organizing datasets 3 | ------------------- 4 | 5 | .. automodule:: soundpy.datasets 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/dsp.rst: -------------------------------------------------------------------------------- 1 | 2 | Working with signals 3 | -------------------- 4 | 5 | .. automodule:: soundpy.dsp 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/example_cases.rst: -------------------------------------------------------------------------------- 1 | 2 | .. toctree:: 3 | :maxdepth: 2 4 | 5 | .. include:: auto_examples/index.rst 6 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/examples/README.txt: -------------------------------------------------------------------------------- 1 | 2 | ----------------------------- 3 | SoundPy Example Use Cases 4 | ----------------------------- 5 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/examples/plot_dataset_info_formatting.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ======================================== 4 | Audio Dataset Exploration and Formatting 5 | ======================================== 6 | 7 | Examine audio files within a dataset, and reformat them if desired. 8 | 9 | To see how soundpy implements this, see `soundpy.builtin.dataset_logger` and 10 | `soundpy.builtin.dataset_formatter`. 11 | """ 12 | 13 | ##################################################################### 14 | # Let's import soundpy 15 | import soundpy as sp 16 | 17 | ############################################################################################### 18 | # 19 | # Dataset Exploration 20 | # ^^^^^^^^^^^^^^^^^^^ 21 | 22 | ########################################################## 23 | # Designate path relevant for accessing audiodata 24 | sp_dir = '../../../' 25 | 26 | ########################################################## 27 | # I will explore files in a small dataset on my computer with varying file formats. 28 | dataset_path = '{}audiodata2/'.format(sp_dir) 29 | dataset_info_dict = sp.builtin.dataset_logger('{}audiodata2/'.format(sp_dir)); 30 | 31 | ######################################################################### 32 | # This returns our data in a dictionary, perfect for exploring via Pandas 33 | import pandas as pd 34 | all_data = pd.DataFrame(dataset_info_dict).T 35 | all_data.head() 36 | 37 | ################################### 38 | # Let's have a look at the audio files and how uniform they are: 39 | print('formats: ', all_data.format_type.unique()) 40 | print('bitdepth (types): ', all_data.bitdepth.unique()) 41 | print('mean duration (sec): ', all_data.dur_sec.mean()) 42 | print('std dev duration (sec): ', all_data.dur_sec.std()) 43 | print('min sample rate: ', all_data.sr.min()) 44 | print('max sample rate: ', all_data.sr.max()) 45 | print('number of channels: ', all_data.num_channels.unique()) 46 | 47 | 48 | ########################################################## 49 | # For a visual example, let's plot the count of various sample rates. (48000 Hz is high definition sound, 16000 Hz is wideband, and 8000 Hz is narrowband, similar to how speech sounds on the telephone.) 50 | all_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts') 51 | 52 | ############################################################################################### 53 | # Reformat a Dataset 54 | # ^^^^^^^^^^^^^^^^^^ 55 | 56 | ############################################################## 57 | # Let's say we have a dataset that we want to make consistent. 58 | # We can do that with soundpy 59 | new_dataset_dir = sp.builtin.dataset_formatter( 60 | dataset_path, 61 | recursive = True, # we want all the audio, even in nested directories 62 | format='WAV', 63 | bitdepth = 16, # if set to None, a default bitdepth will be applied 64 | sr = 16000, # wideband 65 | mono = True, # ensure data all have 1 channel 66 | dur_sec = 3, # audio will be limited to 3 seconds 67 | zeropad = True, # audio shorter than 3 seconds will be zeropadded 68 | new_dir = './example_dir/', # if None, a time-stamped directory will be created for you 69 | overwrite = False # can set to True if you want to overwrite files 70 | ); 71 | 72 | ############################################### 73 | # Let's see what the audio data looks like now: 74 | dataset_formatted_dict = sp.builtin.dataset_logger(new_dataset_dir, recursive=True); 75 | formatted_data = pd.DataFrame(dataset_formatted_dict).T 76 | 77 | ##################### 78 | formatted_data.head() 79 | 80 | ################################### 81 | print('audio formats: ', formatted_data.format_type.unique()) 82 | print('bitdepth (types): ', formatted_data.bitdepth.unique()) 83 | print('mean duration (sec): ', formatted_data.dur_sec.mean()) 84 | print('std dev duration (sec): ', formatted_data.dur_sec.std()) 85 | print('min sample rate: ', formatted_data.sr.min()) 86 | print('max sample rate: ', formatted_data.sr.max()) 87 | print('number of channels: ', formatted_data.num_channels.unique()) 88 | 89 | ########################################################## 90 | # Now all the audio data is sampled at the same rate: 8000 Hz 91 | formatted_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts') 92 | 93 | ########################################### 94 | # There we go! 95 | # You can reformat only parts of the audio files, e.g. format or bitdepth. 96 | # If you leave parameters in sp.builtin.dataset_formatter as None, the original 97 | # settings of the audio file will be maintained (except for bitdepth. 98 | # A default bitdepth will be applied according to the format of the file); see `soundfile.default_subtype`. 99 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/examples/plot_extract_augment_train_classifier.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ================================================== 4 | Extract, Augment, and Train an Acoustic Classifier 5 | ================================================== 6 | 7 | Extract and augment features as an acoustic classifier is trained on speech. 8 | 9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_extract_train`. 10 | """ 11 | 12 | ############################################################################################### 13 | # 14 | 15 | import os, sys 16 | import inspect 17 | currentdir = os.path.dirname(os.path.abspath( 18 | inspect.getfile(inspect.currentframe()))) 19 | parentdir = os.path.dirname(currentdir) 20 | parparentdir = os.path.dirname(parentdir) 21 | packagedir = os.path.dirname(parparentdir) 22 | sys.path.insert(0, packagedir) 23 | 24 | import matplotlib.pyplot as plt 25 | import IPython.display as ipd 26 | package_dir = '../../../' 27 | os.chdir(package_dir) 28 | sp_dir = package_dir 29 | 30 | 31 | ##################################################################### 32 | # Let's import soundpy for handling sound 33 | import soundpy as sp 34 | ##################################################################### 35 | # As well as the deep learning component of soundpy 36 | from soundpy import models as spdl 37 | 38 | 39 | ###################################################### 40 | # Prepare for Training: Data Organization 41 | # ======================================= 42 | 43 | ###################################################### 44 | # I will use a sample speech commands data set: 45 | 46 | ########################################################## 47 | # Designate path relevant for accessing audiodata 48 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir) 49 | 50 | 51 | ###################################################### 52 | # Setup a Feature Settings Dictionary 53 | # ----------------------------------- 54 | 55 | 56 | feature_type = 'fbank' 57 | num_filters = 40 58 | rate_of_change = False 59 | rate_of_acceleration = False 60 | dur_sec = 1 61 | win_size_ms = 25 62 | percent_overlap = 0.5 63 | sr = 22050 64 | fft_bins = None 65 | num_mfcc = None 66 | real_signal = True 67 | 68 | get_feats_kwargs = dict(feature_type = feature_type, 69 | sr = sr, 70 | dur_sec = dur_sec, 71 | win_size_ms = win_size_ms, 72 | percent_overlap = percent_overlap, 73 | fft_bins = fft_bins, 74 | num_filters = num_filters, 75 | num_mfcc = num_mfcc, 76 | rate_of_change = rate_of_change, 77 | rate_of_acceleration = rate_of_acceleration, 78 | real_signal = real_signal) 79 | 80 | ###################################################### 81 | # Setup an Augmentation Dictionary 82 | # -------------------------------- 83 | # This will apply augmentations at random at each epoch. 84 | augmentation_all = dict([('add_white_noise',True), 85 | ('speed_decrease', True), 86 | ('speed_increase', True), 87 | ('pitch_decrease', True), 88 | ('pitch_increase', True), 89 | ('harmonic_distortion', True), 90 | ('vtlp', True) 91 | ]) 92 | 93 | ########################################################## 94 | # see the default values for these augmentations 95 | augment_settings_dict = {} 96 | for key in augmentation_all.keys(): 97 | augment_settings_dict[key] = sp.augment.get_augmentation_settings_dict(key) 98 | for key, value in augment_settings_dict.items(): 99 | print(key, ' : ', value) 100 | 101 | ########################################################## 102 | # Adjust Augmentation Defaults 103 | # ---------------------------- 104 | 105 | 106 | ########################################################## 107 | # Adjust Add White Noise 108 | # ~~~~~~~~~~~~~~~~~~~~~~ 109 | # I want the SNR of the white noise to vary between several: 110 | # SNR 10, 15, and 20. 111 | augment_settings_dict['add_white_noise']['snr'] = [10,15,20] 112 | 113 | ########################################################## 114 | # Adjust Pitch Decrease 115 | # ~~~~~~~~~~~~~~~~~~~~~ 116 | # I found the pitch changes too exaggerated, so I will 117 | # set those to 1 instead of 2 semitones. 118 | augment_settings_dict['pitch_decrease']['num_semitones'] = 1 119 | 120 | ########################################################## 121 | # Adjust Pitch Increase 122 | # ~~~~~~~~~~~~~~~~~~~~~ 123 | augment_settings_dict['pitch_increase']['num_semitones'] = 1 124 | 125 | ########################################################## 126 | # Adjust Speed Decrease 127 | # ~~~~~~~~~~~~~~~~~~~~~ 128 | augment_settings_dict['speed_decrease']['perc'] = 0.1 129 | 130 | ########################################################## 131 | # Adjust Speed Increase 132 | # ~~~~~~~~~~~~~~~~~~~~~ 133 | augment_settings_dict['speed_increase']['perc'] = 0.1 134 | 135 | 136 | ###################################################### 137 | # Update an Augmentation Dictionary 138 | # --------------------------------- 139 | # We'll include in the dictionary the settings we want for augmentations: 140 | augmentation_all.update( 141 | dict(augment_settings_dict = augment_settings_dict)) 142 | 143 | 144 | ###################################################### 145 | # Train the Model 146 | # =============== 147 | # Note: disregard the warning: 148 | # WARNING: Only the power spectrum of the VTLP augmented signal can be returned due to resizing the augmentation from (56, 4401) to (79, 276) 149 | # 150 | # This is due to the hyper frequency resolution applied to the audio during 151 | # vocal-tract length perturbation, and then deresolution to bring to correct size. 152 | # The current implementation applies the deresolution to the power spectrum rather than 153 | # directly to the STFT. 154 | model_dir, history = spdl.envclassifier_extract_train( 155 | model_name = 'augment_builtin_speechcommands', 156 | audiodata_path = data_dir, 157 | augment_dict = augmentation_all, 158 | labeled_data = True, 159 | batch_size = 1, 160 | epochs = 50, 161 | patience = 5, 162 | visualize = True, 163 | vis_every_n_items = 1, 164 | **get_feats_kwargs) 165 | 166 | ############################################################# 167 | # Let's plot how the model performed (on this small dataset) 168 | plt.clf() 169 | plt.plot(history.history['accuracy']) 170 | plt.plot(history.history['val_accuracy']) 171 | plt.title('model accuracy') 172 | plt.ylabel('accuracy') 173 | plt.xlabel('epoch') 174 | plt.legend(['train', 'val'], loc='upper right') 175 | plt.savefig('accuracy.png') 176 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/examples/plot_featureprep_denoiser.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ======================================================= 4 | Feature Extraction for Denoising: Clean and Noisy Audio 5 | ======================================================= 6 | 7 | Extract acoustic features from clean and noisy datasets for 8 | training a denoising model, e.g. a denoising autoencoder. 9 | 10 | To see how soundpy implements this, see `soundpy.builtin.denoiser_feats`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | 17 | ##################################################################### 18 | import os, sys 19 | import inspect 20 | currentdir = os.path.dirname(os.path.abspath( 21 | inspect.getfile(inspect.currentframe()))) 22 | parentdir = os.path.dirname(currentdir) 23 | parparentdir = os.path.dirname(parentdir) 24 | packagedir = os.path.dirname(parparentdir) 25 | sys.path.insert(0, packagedir) 26 | 27 | import soundpy as sp 28 | import IPython.display as ipd 29 | package_dir = '../../../' 30 | os.chdir(package_dir) 31 | sp_dir = package_dir 32 | 33 | ###################################################### 34 | # Prepare for Extraction: Data Organization 35 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 36 | 37 | ###################################################### 38 | # I will use a mini denoising dataset as an example 39 | 40 | # Example noisy data: 41 | data_noisy_dir = '{}../mini-audio-datasets/denoise/noisy'.format(sp_dir) 42 | # Example clean data: 43 | data_clean_dir = '{}../mini-audio-datasets/denoise/clean'.format(sp_dir) 44 | # Where to save extracted features: 45 | data_features_dir = './audiodata/example_feats_models/denoiser/' 46 | 47 | ###################################################### 48 | # Choose Feature Type 49 | # ~~~~~~~~~~~~~~~~~~~ 50 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'. 51 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'. 52 | 53 | feature_type = 'stft' 54 | sr = 22050 55 | 56 | ###################################################### 57 | # Set Duration of Audio 58 | # ~~~~~~~~~~~~~~~~~~~~~ 59 | # How much audio in seconds used from each audio file. 60 | # the speech samples are about 3 seconds long. 61 | dur_sec = 3 62 | 63 | ####################################################################### 64 | # Option 1: Built-In Functionality: soundpy does everything for you 65 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 66 | 67 | ############################################################ 68 | # Define which data to use and which features to extract. 69 | # NOTE: beacuse of the very small dataset, will set 70 | # `perc_train` to a lower level than 0.8. (Otherwise, will raise error) 71 | # Everything else is based on defaults. A feature folder with 72 | # the feature data will be created in the current working directory. 73 | # (Although, you can set this under the parameter `data_features_dir`) 74 | # `visualize` saves periodic images of the features extracted. 75 | # This is useful if you want to know what's going on during the process. 76 | perc_train = 0.6 # with larger datasets this would be around 0.8 77 | extraction_dir = sp.denoiser_feats( 78 | data_clean_dir = data_clean_dir, 79 | data_noisy_dir = data_noisy_dir, 80 | sr = sr, 81 | feature_type = feature_type, 82 | dur_sec = dur_sec, 83 | perc_train = perc_train, 84 | visualize=True); 85 | extraction_dir 86 | 87 | ################################################################ 88 | # The extracted features, extraction settings applied, and 89 | # which audio files were assigned to which datasets 90 | # will be saved in the `extraction_dir` directory 91 | 92 | 93 | ############################################################ 94 | # Logged Information 95 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 96 | # Let's have a look at the files in the extraction_dir. The files ending 97 | # with .npy extension contain the feature data; the .csv files contain 98 | # logged information. 99 | featfiles = list(extraction_dir.glob('*.*')) 100 | for f in featfiles: 101 | print(f.name) 102 | 103 | ############################################################ 104 | # Feature Settings 105 | # ~~~~~~~~~~~~~~~~~~ 106 | # Since much was conducted behind the scenes, it's nice to know how the features 107 | # were extracted, for example, the sample rate and number of frequency bins applied, etc. 108 | feat_settings = sp.utils.load_dict( 109 | extraction_dir.joinpath('log_extraction_settings.csv')) 110 | for key, value in feat_settings.items(): 111 | print(key, ' ---> ', value) 112 | 113 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/examples/plot_featureprep_envclassifier.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ===================================== 4 | Feature Extraction for Classification 5 | ===================================== 6 | 7 | Extract acoustic features from labeled data for 8 | training an environment or speech classifier. 9 | 10 | To see how soundpy implements this, see `soundpy.builtin.envclassifier_feats`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | 17 | 18 | ##################################################################### 19 | import os, sys 20 | import inspect 21 | currentdir = os.path.dirname(os.path.abspath( 22 | inspect.getfile(inspect.currentframe()))) 23 | parentdir = os.path.dirname(currentdir) 24 | parparentdir = os.path.dirname(parentdir) 25 | packagedir = os.path.dirname(parparentdir) 26 | sys.path.insert(0, packagedir) 27 | 28 | import soundpy as sp 29 | import IPython.display as ipd 30 | package_dir = '../../../' 31 | os.chdir(package_dir) 32 | sp_dir = package_dir 33 | 34 | ###################################################### 35 | # Prepare for Extraction: Data Organization 36 | # ----------------------------------------- 37 | 38 | ###################################################### 39 | # I will use a sample speech commands data set: 40 | 41 | ########################################################## 42 | # Designate path relevant for accessing audiodata 43 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir) 44 | 45 | ###################################################### 46 | # Choose Feature Type 47 | # ~~~~~~~~~~~~~~~~~~~ 48 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'. 49 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'. 50 | 51 | feature_type = 'fbank' 52 | 53 | ###################################################### 54 | # Set Duration of Audio 55 | # ~~~~~~~~~~~~~~~~~~~~~ 56 | # How much audio in seconds used from each audio file. 57 | # The example noise and speech files are only 1 second long 58 | dur_sec = 1 59 | 60 | 61 | ############################################################# 62 | # Built-In Functionality - soundpy extracts the features for you 63 | # --------------------------------------------------------------- 64 | 65 | ############################################################ 66 | # Define which data to use and which features to extract 67 | # Everything else is based on defaults. A feature folder with 68 | # the feature data will be created in the current working directory. 69 | # (Although, you can set this under the parameter `data_features_dir`) 70 | # `visualize` saves periodic images of the features extracted. 71 | # This is useful if you want to know what's going on during the process. 72 | extraction_dir = sp.envclassifier_feats(data_dir, 73 | feature_type=feature_type, 74 | dur_sec=dur_sec, 75 | visualize=True); 76 | 77 | ################################################################ 78 | # The extracted features, extraction settings applied, and 79 | # which audio files were assigned to which datasets 80 | # will be saved in the following directory: 81 | extraction_dir 82 | 83 | ############################################################ 84 | # Logged Information 85 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 86 | # Let's have a look at the files in the extraction_dir. The files ending 87 | # with .npy extension contain the feature data; the .csv files contain 88 | # logged information. 89 | featfiles = list(extraction_dir.glob('*.*')) 90 | for f in featfiles: 91 | print(f.name) 92 | 93 | ############################################################ 94 | # Feature Settings 95 | # ~~~~~~~~~~~~~~~~~~ 96 | # Since much was conducted behind the scenes, it's nice to know how the features 97 | # were extracted, for example, the sample rate and number of frequency bins applied, etc. 98 | feat_settings = sp.utils.load_dict( 99 | extraction_dir.joinpath('log_extraction_settings.csv')) 100 | for key, value in feat_settings.items(): 101 | print(key, ' ---> ', value) 102 | 103 | 104 | ############################################################ 105 | # Labeled Data 106 | # ~~~~~~~~~~~~~~~~~~ 107 | # These are the labels and their encoded values: 108 | encode_dict = sp.utils.load_dict( 109 | extraction_dir.joinpath('dict_encode.csv')) 110 | for key, value in encode_dict.items(): 111 | print(key, ' ---> ', value) 112 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/examples/plot_filter_out_noise.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | """ 4 | =========================== 5 | Filter Out Background Noise 6 | =========================== 7 | 8 | Filter out background noise from noisy speech signals. 9 | 10 | To see how soundpy implements this, see `soundpy.builtin.filtersignal`. 11 | 12 | As a general note for filtering, the Wiener Filter is the default filter for soundpy. It seems to filter signals more consequently than the Band Spectral Subtraction Filter. 13 | """ 14 | 15 | 16 | ############################################################################################### 17 | # 18 | 19 | 20 | ##################################################################### 21 | 22 | # Let's import soundpy, and ipd for playing audio data 23 | import soundpy as sp 24 | import IPython.display as ipd 25 | 26 | 27 | ###################################################### 28 | # Define the noisy and clean speech audio files. 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 30 | # Note: these files are available in the soundpy repo. 31 | # Designate path relevant for accessing audiodata 32 | sp_dir = '../../../' 33 | 34 | ########################################################## 35 | # Noise sample: 36 | noise = '{}audiodata/background_samples/traffic.wav'.format(sp_dir) 37 | noise = sp.string2pathlib(noise) 38 | speech = '{}audiodata/python.wav'.format(sp_dir) 39 | speech = sp.utils.string2pathlib(speech) 40 | 41 | ########################################################## 42 | # For filtering, we will set the sample rate to be quite high: 43 | sr = 48000 44 | 45 | ########################################################## 46 | # Create noisy speech signal as SNR 10 47 | noisy, snr_measured = sp.dsp.add_backgroundsound( 48 | speech, 49 | noise, 50 | sr = sr, 51 | snr = 10, 52 | total_len_sec = 2, 53 | pad_mainsound_sec = 0.5) 54 | 55 | ########################################################## 56 | # Hear and see the noisy speech 57 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 58 | 59 | ipd.Audio(noisy,rate=sr) 60 | 61 | ########################################################## 62 | sp.plotsound(noisy, sr=sr, feature_type='signal', 63 | title = 'Noisy Speech', subprocess=True) 64 | 65 | 66 | ########################################################## 67 | # Hear and see the clean speech 68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 | s, sr = sp.loadsound(speech, sr=sr) 70 | ipd.Audio(s,rate=sr) 71 | 72 | ########################################################## 73 | sp.plotsound(s, sr=sr, feature_type='signal', 74 | title = 'Clean Speech', subprocess=True) 75 | 76 | 77 | ########################################################## 78 | # Filter the noisy speech 79 | # ^^^^^^^^^^^^^^^^^^^^^^^ 80 | 81 | ########################################################## 82 | # Wiener Filter 83 | # ~~~~~~~~~~~~~ 84 | 85 | ########################################################## 86 | # Let's filter with a Wiener filter: 87 | noisy_wf, sr = sp.filtersignal(noisy, 88 | sr = sr, 89 | filter_type = 'wiener') # default 90 | 91 | ########################################################## 92 | ipd.Audio(noisy_wf,rate=sr) 93 | 94 | ########################################################## 95 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal', 96 | title = 'Noisy Speech: Wiener Filter', 97 | subprocess=True) 98 | 99 | ################################################################# 100 | # Wiener Filter with Postfilter 101 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 102 | 103 | ########################################################## 104 | # Let's filter with a Wiener filter and postfilter 105 | noisy_wfpf, sr = sp.filtersignal(noisy, 106 | sr = sr, 107 | filter_type = 'wiener', 108 | apply_postfilter = True) 109 | 110 | ########################################################## 111 | ipd.Audio(noisy_wfpf,rate=sr) 112 | 113 | ########################################################## 114 | sp.plotsound(noisy_wfpf, sr=sr, feature_type = 'signal', 115 | title = 'Noisy Speech: Wiener Filter with Postfilter', 116 | subprocess=True) 117 | 118 | ################################################################# 119 | # Band Spectral Subtraction 120 | # ~~~~~~~~~~~~~~~~~~~~~~~~~ 121 | 122 | ########################################################## 123 | # Let's filter using band spectral subtraction 124 | noisy_bs, sr = sp.filtersignal(noisy, 125 | sr = sr, 126 | filter_type = 'bandspec') 127 | 128 | ########################################################## 129 | ipd.Audio(noisy_bs,rate=sr) 130 | 131 | ########################################################## 132 | sp.plotsound(noisy_bs, sr = sr, feature_type = 'signal', 133 | title = 'Noisy Speech: Band Spectral Subtraction', 134 | subprocess=True) 135 | 136 | 137 | ################################################################# 138 | # Band Spectral Subtraction with Postfilter 139 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 140 | 141 | ######################################################################### 142 | # Finally, let's filter using band spectral subtraction with a postfilter 143 | noisy_bspf, sr = sp.filtersignal(noisy, 144 | sr = sr, 145 | filter_type = 'bandspec', 146 | apply_postfilter = True) 147 | 148 | ########################################################## 149 | ipd.Audio(noisy_bspf,rate=sr) 150 | 151 | ########################################################## 152 | sp.plotsound(noisy_bspf, sr = sr, feature_type = 'signal', 153 | title = 'Noisy Speech: Band Spectral Subtraction with Postfilter', 154 | subprocess=True) 155 | 156 | 157 | ########################################################## 158 | # Filter: increase the scale 159 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^ 160 | 161 | ########################################################## 162 | # Let's filter with a Wiener filter: 163 | filter_scale = 5 164 | noisy_wf, sr = sp.filtersignal(noisy, 165 | sr=sr, 166 | filter_type = 'wiener', 167 | filter_scale = filter_scale) 168 | 169 | ########################################################## 170 | # Wiener Filter 171 | # ~~~~~~~~~~~~~ 172 | 173 | ########################################################## 174 | ipd.Audio(noisy_wf,rate=sr) 175 | 176 | ########################################################## 177 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal', 178 | title = 'Noisy Speech: Wiener Filter Scale {}'.format(filter_scale), 179 | subprocess=True) 180 | 181 | ################################################################# 182 | # Wiener Filter with Postfilter 183 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 184 | 185 | ########################################################## 186 | # Let's filter with a Wiener filter and postfilter 187 | noisy_wfpf, sr = sp.filtersignal(noisy, 188 | sr = sr, 189 | filter_type = 'wiener', 190 | apply_postfilter = True, 191 | filter_scale = filter_scale) 192 | 193 | ########################################################## 194 | ipd.Audio(noisy_wfpf,rate = sr) 195 | 196 | ########################################################## 197 | sp.plotsound(noisy_wfpf, sr = sr, feature_type = 'signal', 198 | title = 'Noisy Speech: Wiener Filter with Postfilter Scale {}'.format(filter_scale), 199 | subprocess=True) 200 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/examples/plot_implement_denoiser.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ================================= 4 | Implement a Denoising Autoencoder 5 | ================================= 6 | 7 | Implement denoising autoencoder to denoise a noisy speech signal. 8 | 9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_run`. 10 | """ 11 | 12 | 13 | ############################################################################################ 14 | # 15 | 16 | ##################################################################### 17 | # Let's import soundpy and other packages 18 | import soundpy as sp 19 | import numpy as np 20 | # for playing audio in this notebook: 21 | import IPython.display as ipd 22 | 23 | ##################################################################### 24 | # As well as the deep learning component of soundpy 25 | from soundpy import models as spdl 26 | 27 | ###################################################### 28 | # Prepare for Implementation: Data Organization 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 30 | 31 | ########################################################## 32 | # Set path relevant for audio data for this example 33 | sp_dir = '../../../' 34 | 35 | ###################################################### 36 | # Set model pathway 37 | # ~~~~~~~~~~~~~~~~~ 38 | # Currently, this expects a model saved with weights, with a .h5 extension. 39 | # (See `model` below) 40 | 41 | ###################################################### 42 | # The soundpy repo offers a pre-trained denoiser, which we'll use. 43 | model = '{}audiodata/models/'.format(sp_dir)+\ 44 | 'denoiser/example_denoiser_stft.h5' 45 | # ensure is a pathlib.PosixPath object 46 | print(model) 47 | model = sp.utils.string2pathlib(model) 48 | model_dir = model.parent 49 | 50 | ######################################################### 51 | # What is in this folder? 52 | files = list(model_dir.glob('*.*')) 53 | for f in files: 54 | print(f.name) 55 | 56 | ###################################################### 57 | # Provide dictionary with feature extraction settings 58 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 59 | 60 | ######################################################### 61 | # If soundpy extracts features for you, a 'log_extraction_settings.csv' 62 | # file will be saved, which includes relevant feature settings for implementing 63 | # the model; see `soundpy.feats.save_features_datasets` 64 | feat_settings = sp.utils.load_dict( 65 | model_dir.joinpath('log_extraction_settings.csv')) 66 | for key, value in feat_settings.items(): 67 | print(key, ' --> ', value) 68 | # change objects that were string to original format 69 | import ast 70 | try: 71 | feat_settings[key] = ast.literal_eval(value) 72 | except ValueError: 73 | pass 74 | except SyntaxError: 75 | pass 76 | 77 | ######################################################### 78 | # For the purposes of plotting, let's use some of the settings defined: 79 | feature_type = feat_settings['feature_type'] 80 | sr = feat_settings['sr'] 81 | 82 | ###################################################### 83 | # Provide new audio for the denoiser to denoise! 84 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 85 | 86 | ######################################################### 87 | # We'll use sample speech from the soundpy repo: 88 | speech = sp.string2pathlib('{}audiodata/python.wav'.format(sp_dir)) 89 | s, sr = sp.loadsound(speech, sr=sr) 90 | 91 | ######################################################### 92 | # Let's add some white noise (10 SNR) 93 | s_n = sp.augment.add_white_noise(s, sr=sr, snr=10) 94 | 95 | ############################################################## 96 | # What does the noisy audio sound like? 97 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 98 | ipd.Audio(s_n,rate=sr) 99 | 100 | ############################################################## 101 | # What does the noisy audio look like? 102 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 103 | sp.plotsound(s_n, sr = sr, feature_type='signal', subprocess=True) 104 | 105 | ############################################################## 106 | # What does the clean audio sound like? 107 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 108 | ipd.Audio(s,rate=sr) 109 | 110 | ############################################################## 111 | # What does the clean audio look like? 112 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 113 | sp.plotsound(s, sr = sr, feature_type='signal', subprocess=True) 114 | 115 | ######################################################################### 116 | # Built-In Denoiser Functionality 117 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 118 | 119 | ############################################################## 120 | # We just need to feed the model path, the noisy sample path, and 121 | # the feature settings dictionary we looked at above. 122 | y, sr = spdl.denoiser_run(model, s_n, feat_settings) 123 | 124 | ########################################################## 125 | # How does the output sound? 126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~ 127 | ipd.Audio(y,rate=sr) 128 | 129 | ########################################################## 130 | # How does is the output look? 131 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 132 | sp.plotsound(y, sr=sr, feature_type = feature_type, subprocess=True) 133 | 134 | ########################################################## 135 | # How do the features compare? 136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 137 | 138 | ########################################################## 139 | # STFT features of the noisy input speech: 140 | sp.plotsound(s_n, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db', 141 | title = 'Noisy input: STFT features', subprocess=True) 142 | 143 | ########################################################## 144 | # STFT features of the output 145 | sp.plotsound(y, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db', 146 | title = 'Denoiser Output: STFT features', subprocess=True) 147 | 148 | ########################################################## 149 | # STFT features of the clean version of the audio: 150 | sp.plotsound(s, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db', 151 | title = 'Clean "target" audio: STFT features', subprocess=True) 152 | 153 | 154 | ########################################################## 155 | # It's not perfect but for a pretty simple implementation, the noise is gone 156 | # and you can hear the person speaking. Pretty cool! 157 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/examples/plot_signals_and_features.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ======================= 4 | Create and Plot Signals 5 | ======================= 6 | 7 | Create and plot signals / noise; combine them at a specific SNR. 8 | 9 | To see how soundpy implements this, see `soundpy.dsp.generate_sound`, 10 | `soundpy.dsp.generate_noise` and `soundpy.dsp.add_backgroundsound`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | 17 | ##################################################################### 18 | # Let's import soundpy 19 | import soundpy as sp 20 | 21 | ########################################################################### 22 | # Create a Signal 23 | # ^^^^^^^^^^^^^^^ 24 | 25 | ######################################################################## 26 | # First let's set what sample rate we want to use 27 | sr = 44100 28 | 29 | 30 | ######################################################################### 31 | # Let's create a signal of 10 Hz 32 | sig1_hz = 10 33 | sig1, sr = sp.generate_sound(freq=sig1_hz, amplitude = 0.4, sr=sr, dur_sec=1) 34 | sp.plotsound(sig1, sr=sr, feature_type = 'signal', 35 | title = 'Signal: {} Hz'.format(sig1_hz), subprocess=True) 36 | 37 | 38 | ######################################################################### 39 | # Let's create a signal of 20 Hz 40 | sig2_hz = 20 41 | sig2, sr = sp.generate_sound(freq=sig2_hz, amplitude= 0.4, sr=sr, dur_sec=1) 42 | sp.plotsound(sig2, sr=sr, feature_type = 'signal', 43 | title = 'Signal: {} Hz'.format(sig2_hz), subprocess=True) 44 | 45 | ########################################################################### 46 | # Combine Signals 47 | # ^^^^^^^^^^^^^^^ 48 | 49 | 50 | ######################################################################### 51 | # Add them together and see what they look like: 52 | sig3 = sig1 + sig2 53 | sp.plotsound(sig3, sr=sr, feature_type = 'signal', 54 | title='Mixed Signals: {} Hz + {} Hz'.format(sig1_hz, sig2_hz), 55 | subprocess=True) 56 | 57 | 58 | ########################################################################## 59 | # Generate Pseudo-Random Noise 60 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 61 | 62 | 63 | ######################################################################### 64 | # Create noise to add to the signal: 65 | noise = sp.generate_noise(len(sig3), amplitude=0.02, random_seed=40) 66 | sp.plotsound(noise, sr=sr, feature_type = 'signal', 67 | title='Random Noise', subprocess=True) 68 | 69 | ########################################################################### 70 | # Control SNR: Adding a Background Sound 71 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 72 | 73 | ######################################################################### 74 | # Add noise at signal-to-noise ratio of 40 75 | sig_noisy, snr = sp.dsp.add_backgroundsound( 76 | audio_main = sig3, 77 | audio_background = noise, 78 | sr = sr, 79 | snr = 40, 80 | clip_at_zero = False) 81 | 82 | # keep energy between 1 and -1 83 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 84 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 40 SNR', 85 | subprocess=True) 86 | 87 | ######################################################################### 88 | # Add noise at signal-to-noise ratio of 20 89 | sig_noisy, snr = sp.dsp.add_backgroundsound( 90 | audio_main = sig3, 91 | audio_background = noise, 92 | sr = sr, 93 | snr = 20) 94 | # keep energy between 1 and -1 95 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 96 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 20 SNR', 97 | subprocess=True) 98 | 99 | ######################################################################### 100 | # Add noise at signal-to-noise ratio of 10 101 | sig_noisy, snr = sp.dsp.add_backgroundsound( 102 | audio_main = sig3, 103 | audio_background = noise, 104 | sr = sr, 105 | snr = 10) 106 | # keep energy between 1 and -1 107 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 108 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 10 SNR', 109 | subprocess=True) 110 | 111 | ######################################################################### 112 | # Add noise at signal-to-noise ratio of 0 113 | sig_noisy, snr = sp.dsp.add_backgroundsound( 114 | audio_main = sig3, 115 | audio_background = noise, 116 | sr = sr, 117 | snr = 0) 118 | # keep energy between 1 and -1 119 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 120 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 0 SNR', 121 | subprocess=True) 122 | 123 | 124 | ######################################################################### 125 | # Add noise at signal-to-noise ratio of -10 126 | sig_noisy, snr = sp.dsp.add_backgroundsound( 127 | audio_main = sig3, 128 | audio_background = noise, 129 | sr = sr, 130 | snr = -10) 131 | # keep energy between 1 and -1 132 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 133 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: -10 SNR', 134 | subprocess=True) 135 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/examples/plot_train_classifier.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ============================ 4 | Train an Acoustic Classifier 5 | ============================ 6 | 7 | Train an acoustic classifier on speech or noise features. 8 | 9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_train`. 10 | """ 11 | 12 | ############################################################################################### 13 | # 14 | import os, sys 15 | import inspect 16 | currentdir = os.path.dirname(os.path.abspath( 17 | inspect.getfile(inspect.currentframe()))) 18 | parentdir = os.path.dirname(currentdir) 19 | parparentdir = os.path.dirname(parentdir) 20 | packagedir = os.path.dirname(parparentdir) 21 | sys.path.insert(0, packagedir) 22 | 23 | import matplotlib.pyplot as plt 24 | import IPython.display as ipd 25 | package_dir = '../../../' 26 | os.chdir(package_dir) 27 | sp_dir = package_dir 28 | 29 | 30 | ##################################################################### 31 | # Let's import soundpy for handling sound 32 | import soundpy as sp 33 | ##################################################################### 34 | # As well as the deep learning component of soundpy 35 | from soundpy import models as spdl 36 | 37 | 38 | ###################################################### 39 | # Prepare for Training: Data Organization 40 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 41 | 42 | ########################################################## 43 | # Set path relevant for audio data for this example 44 | 45 | ###################################################### 46 | # I will load previously extracted features (from the Speech Commands Dataset) 47 | # See `soundpy.feats.save_features_datasets` or `soundpy.builtin.envclassifier_feats` 48 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\ 49 | 'envclassifier/example_feats_fbank/' 50 | 51 | ######################################################### 52 | # What is in this folder? 53 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir) 54 | files = list(feature_extraction_dir.glob('*.*')) 55 | for f in files: 56 | print(f.name) 57 | 58 | ######################################################### 59 | # The .npy files contain the features themselves, in train, validation, and 60 | # test datasets: 61 | files = list(feature_extraction_dir.glob('*.npy')) 62 | for f in files: 63 | print(f.name) 64 | 65 | ######################################################### 66 | # The .csv files contain information about how the features were extracted 67 | files = list(feature_extraction_dir.glob('*.csv')) 68 | for f in files: 69 | print(f.name) 70 | 71 | ######################################################### 72 | # We'll have a look at which features were extracted and other settings: 73 | feat_settings = sp.utils.load_dict( 74 | feature_extraction_dir.joinpath('log_extraction_settings.csv')) 75 | for key, value in feat_settings.items(): 76 | print(key, ' --> ', value) 77 | 78 | ######################################################### 79 | # For more about these settings, see `soundpy.feats.save_features_datasets`. 80 | 81 | ######################################################### 82 | # We'll have a look at the audio files that were assigned 83 | # to the train, val, and test datasets. 84 | audio_datasets = sp.utils.load_dict( 85 | feature_extraction_dir.joinpath('dataset_audiofiles.csv')) 86 | count = 0 87 | for key, value in audio_datasets.items(): 88 | print(key, ' --> ', value) 89 | count += 1 90 | if count > 5: 91 | break 92 | 93 | ############################################################# 94 | # Built-In Functionality: soundpy does everything for you 95 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 96 | # For more about this function, see `soundpy.models.builtin.envclassifier_train`. 97 | 98 | ############################################################# 99 | model_dir, history = spdl.envclassifier_train( 100 | feature_extraction_dir = feature_extraction_dir, 101 | epochs = 10, 102 | patience = 5) 103 | 104 | ############################################################# 105 | # Where the model and logs are located: 106 | model_dir 107 | 108 | ############################################################# 109 | # Let's plot how the model performed (on this mini dataset) 110 | import matplotlib.pyplot as plt 111 | plt.clf() 112 | plt.plot(history.history['accuracy']) 113 | plt.plot(history.history['val_accuracy']) 114 | plt.title('model accuracy') 115 | plt.ylabel('accuracy') 116 | plt.xlabel('epoch') 117 | plt.legend(['train', 'val'], loc='upper right') 118 | plt.savefig('accuracy.png') 119 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/examples/plot_train_denoiser.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ============================= 4 | Train a Denoising Autoencoder 5 | ============================= 6 | 7 | Train a denoising autoencoder with clean and noisy acoustic features. 8 | 9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_train`, 10 | `soundpy.builtin.denoiser_feats` and `soundpy.builtin.create_denoise_data`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | import os, sys 17 | import inspect 18 | currentdir = os.path.dirname(os.path.abspath( 19 | inspect.getfile(inspect.currentframe()))) 20 | parentdir = os.path.dirname(currentdir) 21 | parparentdir = os.path.dirname(parentdir) 22 | packagedir = os.path.dirname(parparentdir) 23 | sys.path.insert(0, packagedir) 24 | 25 | import matplotlib.pyplot as plt 26 | import IPython.display as ipd 27 | package_dir = '../../../' 28 | os.chdir(package_dir) 29 | sp_dir = package_dir 30 | 31 | 32 | ##################################################################### 33 | # Let's import soundpy for handling sound 34 | import soundpy as sp 35 | ##################################################################### 36 | # As well as the deep learning component of soundpy 37 | from soundpy import models as spdl 38 | 39 | 40 | ###################################################### 41 | # Prepare for Training: Data Organization 42 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 43 | 44 | ########################################################## 45 | # Designate path relevant for accessing audiodata 46 | 47 | 48 | ###################################################### 49 | # I will load previously extracted features (sample data), see `soundpy.feats.save_features_datasets` or `soundpy.builtin.denoiser_feats` 50 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\ 51 | 'denoiser/example_feats_fbank/' 52 | 53 | ######################################################### 54 | # What is in this folder? 55 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir) 56 | files = list(feature_extraction_dir.glob('*.*')) 57 | for f in files: 58 | print(f.name) 59 | 60 | ######################################################### 61 | # The .npy files contain the features themselves, in train, validation, and 62 | # test datasets: 63 | files = list(feature_extraction_dir.glob('*.npy')) 64 | for f in files: 65 | print(f.name) 66 | 67 | ######################################################### 68 | # The .csv files contain information about how the features were extracted 69 | files = list(feature_extraction_dir.glob('*.csv')) 70 | for f in files: 71 | print(f.name) 72 | 73 | ######################################################### 74 | # We'll have a look at which features were extracted and other settings: 75 | feat_settings = sp.utils.load_dict( 76 | feature_extraction_dir.joinpath('log_extraction_settings.csv')) 77 | for key, value in feat_settings.items(): 78 | print(key, ' --> ', value) 79 | 80 | ######################################################### 81 | # For more about these settings, see `soundpy.feats.save_features_datasets`. 82 | 83 | ######################################################### 84 | # We'll have a look at the audio files that were assigned 85 | # to the train, val, and test datasets. 86 | audio_datasets = sp.utils.load_dict( 87 | feature_extraction_dir.joinpath('audiofiles_datasets_clean.csv')) 88 | count = 0 89 | for key, value in audio_datasets.items(): 90 | print(key, ' --> ', value) 91 | count += 1 92 | if count > 5: 93 | break 94 | 95 | ############################################################# 96 | # Built-In Functionality: soundpy does everything for you 97 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 98 | # For more about this, see `soundpy.builtin.denoiser_train`. 99 | 100 | ############################################################# 101 | model_dir, history = spdl.denoiser_train( 102 | feature_extraction_dir = feature_extraction_dir, 103 | epochs = 10) 104 | 105 | ######################################################### 106 | 107 | 108 | ############################################################# 109 | # Where the model and logs are located: 110 | model_dir 111 | 112 | 113 | ############################################################# 114 | # Let's plot how the model performed (on this mini dataset) 115 | import matplotlib.pyplot as plt 116 | plt.plot(history.history['loss']) 117 | plt.plot(history.history['val_loss']) 118 | plt.title('model loss') 119 | plt.ylabel('loss') 120 | plt.xlabel('epoch') 121 | plt.legend(['train', 'val'], loc='upper right') 122 | plt.savefig('loss.png') 123 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/exceptions.rst: -------------------------------------------------------------------------------- 1 | 2 | Customized Errors 3 | ----------------- 4 | 5 | .. automodule:: soundpy.exceptions 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/feats.rst: -------------------------------------------------------------------------------- 1 | 2 | Extract and manipulate audio features 3 | ------------------------------------- 4 | 5 | .. automodule:: soundpy.feats 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/files.rst: -------------------------------------------------------------------------------- 1 | 2 | Working with audio files 3 | ------------------------ 4 | 5 | .. automodule:: soundpy.files 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/filters.rst: -------------------------------------------------------------------------------- 1 | 2 | Filters: Wiener and Band Spectral Subtraction 3 | --------------------------------------------- 4 | 5 | .. automodule:: soundpy.filters 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | .. autoclass:: soundpy.filters.FilterSettings 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | .. automethod:: __init__ 16 | 17 | .. autoclass:: soundpy.filters.Filter 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | .. automethod:: __init__ 23 | 24 | 25 | .. autoclass:: soundpy.filters.WienerFilter 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | .. automethod:: __init__ 31 | 32 | 33 | .. autoclass:: soundpy.filters.BandSubtraction 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | .. automethod:: __init__ 39 | 40 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/index.rst: -------------------------------------------------------------------------------- 1 | .. SoundPy documentation master file, created by 2 | sphinx-quickstart on Mon Jun 15 11:57:18 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | SoundPy v0.1.0a3 7 | ================ 8 | 9 | Welcome to the docs! 10 | -------------------- 11 | 12 | To access documentation for specific versions: 13 | 14 | .. toctree:: 15 | :maxdepth: 1 16 | 17 | versions.rst 18 | 19 | 20 | About SoundPy 21 | ------------- 22 | 23 | SoundPy is a research based Python package_ for exploring and experimenting with sound and deep learning. NOTE: SoundPy is in alpha stage of development; please forgive any bugs that pop up, and also feel free/encouraged to open an issue_. 24 | 25 | Those who might find this useful: 26 | 27 | * speech and sound enthusiasts 28 | * digital signal processing / mathematics / physics / acoustics enthusiasts 29 | * deep learning enthusiasts 30 | * researchers 31 | * linguists 32 | * psycholinguists 33 | 34 | The main goal of SoundPy is to provide the code and functionality with more context via visualization, research, and mathematics. Most of the resources used to build the functionality stems from publicly available research and datasets. 35 | 36 | As it covers quite a large range, from audio file conversion to implementation of trained neural networks, the purpose of SoundPy is not to be the perfect implementation of all functions (although that is also a goal :P ), but rather a peak into how they *can* be implemented, hopefully offering people a foundation for trying out different ways of implementation (feature extraction, building neural networks, etc.). 37 | 38 | This project is still in the beginning stages and has a lot of room for growth, especially with contributors having a background / knowlege in data science, computer science, machine and deep learning, physics, acoustics, or dsp. Contributors from other backgrounds are also welcome! If you'd like SoundPy to do something it doesn't, try making it or create an issue. 39 | 40 | .. _PyPI: https://pypi.org/project/soundpy/ 41 | 42 | .. _package: https://github.com/a-n-rose/Python-Sound-Tool/tree/development 43 | 44 | .. _issue: https://github.com/a-n-rose/Python-Sound-Tool/issues 45 | 46 | .. toctree:: 47 | :maxdepth: 2 48 | 49 | example_cases.rst 50 | readme.rst 51 | 52 | 53 | .. toctree:: 54 | :maxdepth: 1 55 | 56 | changelog.rst 57 | 58 | * :ref:`genindex` 59 | * :ref:`modindex` 60 | * :ref:`search` 61 | 62 | :Author: 63 | Aislyn Rose 64 | 65 | rose.aislyn.noelle@gmail.com 66 | 67 | webpage_ 68 | 69 | github_ 70 | 71 | .. _webpage: https://a-n-rose.github.io/ 72 | 73 | .. _github : https://github.com/a-n-rose 74 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/model_dataprep.rst: -------------------------------------------------------------------------------- 1 | 2 | Feeding large datasets to models 3 | -------------------------------- 4 | 5 | .. autoclass:: soundpy.models.dataprep.Generator 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | .. automethod:: __init__ 11 | 12 | 13 | .. automodule:: soundpy.models.dataprep 14 | :members: 15 | :undoc-members: 16 | :show-inheritance: 17 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/modelsetup.rst: -------------------------------------------------------------------------------- 1 | 2 | Additional model setup (e.g. Early Stopping) 3 | -------------------------------------------- 4 | 5 | .. automodule:: soundpy.models.modelsetup 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/modules.rst: -------------------------------------------------------------------------------- 1 | ============================== 2 | SoundPy Functionality v0.1.0a3 3 | ============================== 4 | 5 | .. include:: builtin_sp.rst 6 | 7 | .. include:: builtin_spdl.rst 8 | 9 | .. include:: augment.rst 10 | 11 | .. include:: files.rst 12 | 13 | .. include:: datasets.rst 14 | 15 | .. include:: dsp.rst 16 | 17 | .. include:: filters.rst 18 | 19 | .. include:: feats.rst 20 | 21 | .. include:: template_models.rst 22 | 23 | .. include:: modelsetup.rst 24 | 25 | .. include:: model_dataprep.rst 26 | 27 | .. include:: utils.rst 28 | 29 | .. include:: exceptions.rst 30 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: modules.rst 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/template_models.rst: -------------------------------------------------------------------------------- 1 | Template deep neural networks 2 | ----------------------------- 3 | 4 | .. automodule:: soundpy.models.template_models 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/utils.rst: -------------------------------------------------------------------------------- 1 | 2 | Other useful non-specific functionality 3 | --------------------------------------- 4 | 5 | .. automodule:: soundpy.utils 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/0.1.0a3/versions.rst: -------------------------------------------------------------------------------- 1 | ****************************************** 2 | SoundPy Versions Available as PyPI Package 3 | ****************************************** 4 | 5 | .. toctree:: 6 | :maxdepth: 1 7 | 8 | 0.1.0a2/index.rst 9 | 10 | -------------------------------------------------------------------------------- /docs/source/augment.rst: -------------------------------------------------------------------------------- 1 | 2 | Augment audio data 3 | ------------------ 4 | 5 | .. automodule:: soundpy.augment 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/builtin_sp.rst: -------------------------------------------------------------------------------- 1 | 2 | Built-In Functionality (non Deep Learning) 3 | ------------------------------------------ 4 | 5 | .. automodule:: soundpy.builtin 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/builtin_spdl.rst: -------------------------------------------------------------------------------- 1 | 2 | Built-In Functionality (Deep Learning) 3 | -------------------------------------- 4 | 5 | .. automodule:: soundpy.models.builtin 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/changelog.rst: -------------------------------------------------------------------------------- 1 | ********* 2 | Changelog 3 | ********* 4 | 5 | v0.1.0a 6 | ======= 7 | 8 | 9 | v0.1.0a3 10 | -------- 11 | 2021-04-09 12 | 13 | Bug fixes 14 | - no longer use Librosa for feature extraction: allow easier implementation of augmentations, especially during training. 15 | - `soundpy.feats.plot` now uses parameter `subprocess` to allow for different backends to be applied, depending on when funciton is called. For example, if plotting from within a Generator while training, `subprocess` should be set to True, and the 'Agg' backend will be applied. Otherwise, 'TkAgg' backend is used. Fixes issues with multi-threading. 16 | - Fixed generator and Tensorflow issue: with Tensorflow 2.2.0+ the models in `soundpy.models.builtin` that were trained via generator failed. Use `tensorflow.data.Dataset.from_generator` to feed generator data to models. 17 | - Improved `clip_at_zero`. 18 | 19 | Features 20 | - Python 3.8 can now be used. 21 | - throw depreciation warning for parameters `context_window` or `frames_per_sample` as these "features" will be removed from feature extraction. Rather the features can be reshaped post feature extraction. 22 | - added `timestep`, `axis_timestep`, `context_window`, `axis_context_window` and `combine_axes_0_1` paremeters to `soundpy.models.Generator`: allow more control over shape of the features. 23 | - can run `soundpy.models.builtin.envclassifier_extract_train` to run with pre-extracted val and test features. 24 | - `soundpy.feats.plotsound`, `soundpy.feats.plot_vad` and `soundpy.feats.plot_dom_freq` all can plot stereo sound: for each channel in a stereo signal, a plot is either generated or saved. If a filename already exists, a date stamp is added to filename to avoid overwriting images. 25 | - allow `grayscale2color` to be applied to 2D data. 26 | 27 | Breaking changes 28 | - `soundpy.models.Generator` uses parameter `normalize` instaed of `normalized`. Found this to be more intuitive. If `normalize` is set to True, data will be normalized. Before, if `normalized` was set to True, data would not be normalized. 29 | - removed `add_tensor_last` and `add_tensor_first`: require adding of tensors (for keras) to be included in parameter `desired_input_shape`. 30 | 31 | Other changes 32 | - CPU soundpy can use Tensorflow 2.1.0, 2.2.0 and 2.3.0. Dockerfile still uses Tensorflow 2.1.0 as it is still compatible with updated code. 33 | - `soundpy.models.builtin.implement_denoiser` raises warning if cleaned features cannot be converted to raw audio samples. 34 | 35 | 36 | v0.1.0a2 37 | -------- 38 | 2020-08-13 39 | 40 | 41 | Bug fixes 42 | - added `use_beg_ms` parameter in `soundpy.dsp.vad`: improved VAD recognition of silences post speech. 43 | 44 | Features 45 | - added GPU option: provide instructions and Docker image for running SoundPy with GPU 46 | - added `extend_window_ms` paremeter to `soundpy.feats.get_vad_samples` and `soundpy.feats.get_vad_stft`: can extend VAD window if desired. Useful in higher SNR environments. 47 | - added `soundpy.feats.get_samples_clipped` and `soundpy.feats.get_stft_clipped` to clip off beginning and ending silences. 48 | - added `beg_end_clipped` parameter to `soundpy.feats.plot_vad` to visualize VAD by clipping the beginning and ending silences (if True) or VAD instances throughout the signal (if False). 49 | - added `soundpy.models.dataprep.GeneratorFeatExtraction` class for extracting and augmenting features during training (still experimental). 50 | - added `soundpy.models.builtin.envclassifier_extract_train` as an example of extracting and augmenting features during training (still experimental). 51 | - added `soundpy.dsp.clip_at_zero` to enable smoother concatenations of signals and enables removal of clicks at beginning and ending of signals. 52 | - added `soundpy.dsp.remove_dc_bias` to enable smoother concatenations of signals 53 | - added and set `remove_dc` parameter to True in `soundpy.files.loadsound` and `soundpy.files.savesound` to ensure signals all have mean zero. 54 | - added `mirror_sound` option to `soundpy.dsp.apply_sample_length` as a way to extend sound. 55 | - added `soundpy.dsp.ismono` to check if samples were mono or stereo. 56 | - added `soundpy.dsp.average_channels` to average sample amplitudes across channels, e.g. to identify where high energy begins / ends in the signal without disregarding additional channels (if stereo sound). 57 | - added `soundpy.dsp.add_channels` for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound) 58 | - added stereo sound functionality to `soundpy.dsp.add_backgroundsound`, `soundpy.dsp.clip_at_zero`, `soundpy.dsp.calc_fft`, `soundpy.feats.get_stft`, `soundpy.feats.get_vad_stft` 59 | 60 | 61 | Other changes 62 | - name change: from pysoundtool to soundpy: simpler 63 | - updated dependencies to newest versions still compatible with Tensorflow 2.1.0 64 | - moved `soundpy.dsp.get_vad_samples` to `soundpy.feats.get_vad_samples` 65 | - moved `soundpy.dsp.get_vad_stft` to `soundpy.feats.get_vad_stft` 66 | - name change: allow `soundpy.feats.normalize` to be used as `soundpy.normalize` 67 | - removed `pysoundtool_online` and mybinder button as maintaining the online version was not easily done. Aim to reimplement at some point. 68 | 69 | 70 | 71 | v0.1.0a1 72 | ======== 73 | 74 | Initial public alpha release. 75 | -------------------------------------------------------------------------------- /docs/source/datasets.rst: -------------------------------------------------------------------------------- 1 | 2 | Organizing datasets 3 | ------------------- 4 | 5 | .. automodule:: soundpy.datasets 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/dsp.rst: -------------------------------------------------------------------------------- 1 | 2 | Working with signals 3 | -------------------- 4 | 5 | .. automodule:: soundpy.dsp 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/example_cases.rst: -------------------------------------------------------------------------------- 1 | 2 | .. toctree:: 3 | :maxdepth: 2 4 | 5 | .. include:: auto_examples/index.rst 6 | -------------------------------------------------------------------------------- /docs/source/examples/README.txt: -------------------------------------------------------------------------------- 1 | 2 | ----------------------------- 3 | SoundPy Example Use Cases 4 | ----------------------------- 5 | -------------------------------------------------------------------------------- /docs/source/examples/plot_dataset_info_formatting.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ======================================== 4 | Audio Dataset Exploration and Formatting 5 | ======================================== 6 | 7 | Examine audio files within a dataset, and reformat them if desired. 8 | 9 | To see how soundpy implements this, see `soundpy.builtin.dataset_logger` and 10 | `soundpy.builtin.dataset_formatter`. 11 | """ 12 | 13 | ##################################################################### 14 | # Let's import soundpy 15 | import soundpy as sp 16 | 17 | ############################################################################################### 18 | # 19 | # Dataset Exploration 20 | # ^^^^^^^^^^^^^^^^^^^ 21 | 22 | ########################################################## 23 | # Designate path relevant for accessing audiodata 24 | sp_dir = '../../../' 25 | 26 | ########################################################## 27 | # I will explore files in a small dataset on my computer with varying file formats. 28 | dataset_path = '{}audiodata2/'.format(sp_dir) 29 | dataset_info_dict = sp.builtin.dataset_logger('{}audiodata2/'.format(sp_dir)); 30 | 31 | ######################################################################### 32 | # This returns our data in a dictionary, perfect for exploring via Pandas 33 | import pandas as pd 34 | all_data = pd.DataFrame(dataset_info_dict).T 35 | all_data.head() 36 | 37 | ################################### 38 | # Let's have a look at the audio files and how uniform they are: 39 | print('formats: ', all_data.format_type.unique()) 40 | print('bitdepth (types): ', all_data.bitdepth.unique()) 41 | print('mean duration (sec): ', all_data.dur_sec.mean()) 42 | print('std dev duration (sec): ', all_data.dur_sec.std()) 43 | print('min sample rate: ', all_data.sr.min()) 44 | print('max sample rate: ', all_data.sr.max()) 45 | print('number of channels: ', all_data.num_channels.unique()) 46 | 47 | 48 | ########################################################## 49 | # For a visual example, let's plot the count of various sample rates. (48000 Hz is high definition sound, 16000 Hz is wideband, and 8000 Hz is narrowband, similar to how speech sounds on the telephone.) 50 | all_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts') 51 | 52 | ############################################################################################### 53 | # Reformat a Dataset 54 | # ^^^^^^^^^^^^^^^^^^ 55 | 56 | ############################################################## 57 | # Let's say we have a dataset that we want to make consistent. 58 | # We can do that with soundpy 59 | new_dataset_dir = sp.builtin.dataset_formatter( 60 | dataset_path, 61 | recursive = True, # we want all the audio, even in nested directories 62 | format='WAV', 63 | bitdepth = 16, # if set to None, a default bitdepth will be applied 64 | sr = 16000, # wideband 65 | mono = True, # ensure data all have 1 channel 66 | dur_sec = 3, # audio will be limited to 3 seconds 67 | zeropad = True, # audio shorter than 3 seconds will be zeropadded 68 | new_dir = './example_dir/', # if None, a time-stamped directory will be created for you 69 | overwrite = False # can set to True if you want to overwrite files 70 | ); 71 | 72 | ############################################### 73 | # Let's see what the audio data looks like now: 74 | dataset_formatted_dict = sp.builtin.dataset_logger(new_dataset_dir, recursive=True); 75 | formatted_data = pd.DataFrame(dataset_formatted_dict).T 76 | 77 | ##################### 78 | formatted_data.head() 79 | 80 | ################################### 81 | print('audio formats: ', formatted_data.format_type.unique()) 82 | print('bitdepth (types): ', formatted_data.bitdepth.unique()) 83 | print('mean duration (sec): ', formatted_data.dur_sec.mean()) 84 | print('std dev duration (sec): ', formatted_data.dur_sec.std()) 85 | print('min sample rate: ', formatted_data.sr.min()) 86 | print('max sample rate: ', formatted_data.sr.max()) 87 | print('number of channels: ', formatted_data.num_channels.unique()) 88 | 89 | ########################################################## 90 | # Now all the audio data is sampled at the same rate: 8000 Hz 91 | formatted_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts') 92 | 93 | ########################################### 94 | # There we go! 95 | # You can reformat only parts of the audio files, e.g. format or bitdepth. 96 | # If you leave parameters in sp.builtin.dataset_formatter as None, the original 97 | # settings of the audio file will be maintained (except for bitdepth. 98 | # A default bitdepth will be applied according to the format of the file); see `soundfile.default_subtype`. 99 | -------------------------------------------------------------------------------- /docs/source/examples/plot_extract_augment_train_classifier.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ================================================== 4 | Extract, Augment, and Train an Acoustic Classifier 5 | ================================================== 6 | 7 | Extract and augment features as an acoustic classifier is trained on speech. 8 | 9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_extract_train`. 10 | """ 11 | 12 | ############################################################################################### 13 | # 14 | 15 | import os, sys 16 | import inspect 17 | currentdir = os.path.dirname(os.path.abspath( 18 | inspect.getfile(inspect.currentframe()))) 19 | parentdir = os.path.dirname(currentdir) 20 | parparentdir = os.path.dirname(parentdir) 21 | packagedir = os.path.dirname(parparentdir) 22 | sys.path.insert(0, packagedir) 23 | 24 | import matplotlib.pyplot as plt 25 | import IPython.display as ipd 26 | package_dir = '../../../' 27 | os.chdir(package_dir) 28 | sp_dir = package_dir 29 | 30 | 31 | ##################################################################### 32 | # Let's import soundpy for handling sound 33 | import soundpy as sp 34 | ##################################################################### 35 | # As well as the deep learning component of soundpy 36 | from soundpy import models as spdl 37 | 38 | 39 | ###################################################### 40 | # Prepare for Training: Data Organization 41 | # ======================================= 42 | 43 | ###################################################### 44 | # I will use a sample speech commands data set: 45 | 46 | ########################################################## 47 | # Designate path relevant for accessing audiodata 48 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir) 49 | 50 | 51 | ###################################################### 52 | # Setup a Feature Settings Dictionary 53 | # ----------------------------------- 54 | 55 | 56 | feature_type = 'fbank' 57 | num_filters = 40 58 | rate_of_change = False 59 | rate_of_acceleration = False 60 | dur_sec = 1 61 | win_size_ms = 25 62 | percent_overlap = 0.5 63 | sr = 22050 64 | fft_bins = None 65 | num_mfcc = None 66 | real_signal = True 67 | 68 | get_feats_kwargs = dict(feature_type = feature_type, 69 | sr = sr, 70 | dur_sec = dur_sec, 71 | win_size_ms = win_size_ms, 72 | percent_overlap = percent_overlap, 73 | fft_bins = fft_bins, 74 | num_filters = num_filters, 75 | num_mfcc = num_mfcc, 76 | rate_of_change = rate_of_change, 77 | rate_of_acceleration = rate_of_acceleration, 78 | real_signal = real_signal) 79 | 80 | ###################################################### 81 | # Setup an Augmentation Dictionary 82 | # -------------------------------- 83 | # This will apply augmentations at random at each epoch. 84 | augmentation_all = dict([('add_white_noise',True), 85 | ('speed_decrease', True), 86 | ('speed_increase', True), 87 | ('pitch_decrease', True), 88 | ('pitch_increase', True), 89 | ('harmonic_distortion', True), 90 | ('vtlp', True) 91 | ]) 92 | 93 | ########################################################## 94 | # see the default values for these augmentations 95 | augment_settings_dict = {} 96 | for key in augmentation_all.keys(): 97 | augment_settings_dict[key] = sp.augment.get_augmentation_settings_dict(key) 98 | for key, value in augment_settings_dict.items(): 99 | print(key, ' : ', value) 100 | 101 | ########################################################## 102 | # Adjust Augmentation Defaults 103 | # ---------------------------- 104 | 105 | 106 | ########################################################## 107 | # Adjust Add White Noise 108 | # ~~~~~~~~~~~~~~~~~~~~~~ 109 | # I want the SNR of the white noise to vary between several: 110 | # SNR 10, 15, and 20. 111 | augment_settings_dict['add_white_noise']['snr'] = [10,15,20] 112 | 113 | ########################################################## 114 | # Adjust Pitch Decrease 115 | # ~~~~~~~~~~~~~~~~~~~~~ 116 | # I found the pitch changes too exaggerated, so I will 117 | # set those to 1 instead of 2 semitones. 118 | augment_settings_dict['pitch_decrease']['num_semitones'] = 1 119 | 120 | ########################################################## 121 | # Adjust Pitch Increase 122 | # ~~~~~~~~~~~~~~~~~~~~~ 123 | augment_settings_dict['pitch_increase']['num_semitones'] = 1 124 | 125 | ########################################################## 126 | # Adjust Speed Decrease 127 | # ~~~~~~~~~~~~~~~~~~~~~ 128 | augment_settings_dict['speed_decrease']['perc'] = 0.1 129 | 130 | ########################################################## 131 | # Adjust Speed Increase 132 | # ~~~~~~~~~~~~~~~~~~~~~ 133 | augment_settings_dict['speed_increase']['perc'] = 0.1 134 | 135 | 136 | ###################################################### 137 | # Update an Augmentation Dictionary 138 | # --------------------------------- 139 | # We'll include in the dictionary the settings we want for augmentations: 140 | augmentation_all.update( 141 | dict(augment_settings_dict = augment_settings_dict)) 142 | 143 | 144 | ###################################################### 145 | # Train the Model 146 | # =============== 147 | # Note: disregard the warning: 148 | # WARNING: Only the power spectrum of the VTLP augmented signal can be returned due to resizing the augmentation from (56, 4401) to (79, 276) 149 | # 150 | # This is due to the hyper frequency resolution applied to the audio during 151 | # vocal-tract length perturbation, and then deresolution to bring to correct size. 152 | # The current implementation applies the deresolution to the power spectrum rather than 153 | # directly to the STFT. 154 | model_dir, history = spdl.envclassifier_extract_train( 155 | model_name = 'augment_builtin_speechcommands', 156 | audiodata_path = data_dir, 157 | augment_dict = augmentation_all, 158 | labeled_data = True, 159 | batch_size = 1, 160 | epochs = 50, 161 | patience = 5, 162 | visualize = True, 163 | vis_every_n_items = 1, 164 | **get_feats_kwargs) 165 | 166 | ############################################################# 167 | # Let's plot how the model performed (on this small dataset) 168 | plt.clf() 169 | plt.plot(history.history['accuracy']) 170 | plt.plot(history.history['val_accuracy']) 171 | plt.title('model accuracy') 172 | plt.ylabel('accuracy') 173 | plt.xlabel('epoch') 174 | plt.legend(['train', 'val'], loc='upper right') 175 | plt.savefig('accuracy.png') 176 | -------------------------------------------------------------------------------- /docs/source/examples/plot_featureprep_denoiser.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ======================================================= 4 | Feature Extraction for Denoising: Clean and Noisy Audio 5 | ======================================================= 6 | 7 | Extract acoustic features from clean and noisy datasets for 8 | training a denoising model, e.g. a denoising autoencoder. 9 | 10 | To see how soundpy implements this, see `soundpy.builtin.denoiser_feats`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | 17 | ##################################################################### 18 | import os, sys 19 | import inspect 20 | currentdir = os.path.dirname(os.path.abspath( 21 | inspect.getfile(inspect.currentframe()))) 22 | parentdir = os.path.dirname(currentdir) 23 | parparentdir = os.path.dirname(parentdir) 24 | packagedir = os.path.dirname(parparentdir) 25 | sys.path.insert(0, packagedir) 26 | 27 | import soundpy as sp 28 | import IPython.display as ipd 29 | package_dir = '../../../' 30 | os.chdir(package_dir) 31 | sp_dir = package_dir 32 | 33 | ###################################################### 34 | # Prepare for Extraction: Data Organization 35 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 36 | 37 | ###################################################### 38 | # I will use a mini denoising dataset as an example 39 | 40 | # Example noisy data: 41 | data_noisy_dir = '{}../mini-audio-datasets/denoise/noisy'.format(sp_dir) 42 | # Example clean data: 43 | data_clean_dir = '{}../mini-audio-datasets/denoise/clean'.format(sp_dir) 44 | # Where to save extracted features: 45 | data_features_dir = './audiodata/example_feats_models/denoiser/' 46 | 47 | ###################################################### 48 | # Choose Feature Type 49 | # ~~~~~~~~~~~~~~~~~~~ 50 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'. 51 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'. 52 | 53 | feature_type = 'stft' 54 | sr = 22050 55 | 56 | ###################################################### 57 | # Set Duration of Audio 58 | # ~~~~~~~~~~~~~~~~~~~~~ 59 | # How much audio in seconds used from each audio file. 60 | # the speech samples are about 3 seconds long. 61 | dur_sec = 3 62 | 63 | ####################################################################### 64 | # Option 1: Built-In Functionality: soundpy does everything for you 65 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 66 | 67 | ############################################################ 68 | # Define which data to use and which features to extract. 69 | # NOTE: beacuse of the very small dataset, will set 70 | # `perc_train` to a lower level than 0.8. (Otherwise, will raise error) 71 | # Everything else is based on defaults. A feature folder with 72 | # the feature data will be created in the current working directory. 73 | # (Although, you can set this under the parameter `data_features_dir`) 74 | # `visualize` saves periodic images of the features extracted. 75 | # This is useful if you want to know what's going on during the process. 76 | perc_train = 0.6 # with larger datasets this would be around 0.8 77 | extraction_dir = sp.denoiser_feats( 78 | data_clean_dir = data_clean_dir, 79 | data_noisy_dir = data_noisy_dir, 80 | sr = sr, 81 | feature_type = feature_type, 82 | dur_sec = dur_sec, 83 | perc_train = perc_train, 84 | visualize=True); 85 | extraction_dir 86 | 87 | ################################################################ 88 | # The extracted features, extraction settings applied, and 89 | # which audio files were assigned to which datasets 90 | # will be saved in the `extraction_dir` directory 91 | 92 | 93 | ############################################################ 94 | # Logged Information 95 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 96 | # Let's have a look at the files in the extraction_dir. The files ending 97 | # with .npy extension contain the feature data; the .csv files contain 98 | # logged information. 99 | featfiles = list(extraction_dir.glob('*.*')) 100 | for f in featfiles: 101 | print(f.name) 102 | 103 | ############################################################ 104 | # Feature Settings 105 | # ~~~~~~~~~~~~~~~~~~ 106 | # Since much was conducted behind the scenes, it's nice to know how the features 107 | # were extracted, for example, the sample rate and number of frequency bins applied, etc. 108 | feat_settings = sp.utils.load_dict( 109 | extraction_dir.joinpath('log_extraction_settings.csv')) 110 | for key, value in feat_settings.items(): 111 | print(key, ' ---> ', value) 112 | 113 | -------------------------------------------------------------------------------- /docs/source/examples/plot_featureprep_envclassifier.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ===================================== 4 | Feature Extraction for Classification 5 | ===================================== 6 | 7 | Extract acoustic features from labeled data for 8 | training an environment or speech classifier. 9 | 10 | To see how soundpy implements this, see `soundpy.builtin.envclassifier_feats`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | 17 | 18 | ##################################################################### 19 | import os, sys 20 | import inspect 21 | currentdir = os.path.dirname(os.path.abspath( 22 | inspect.getfile(inspect.currentframe()))) 23 | parentdir = os.path.dirname(currentdir) 24 | parparentdir = os.path.dirname(parentdir) 25 | packagedir = os.path.dirname(parparentdir) 26 | sys.path.insert(0, packagedir) 27 | 28 | import soundpy as sp 29 | import IPython.display as ipd 30 | package_dir = '../../../' 31 | os.chdir(package_dir) 32 | sp_dir = package_dir 33 | 34 | ###################################################### 35 | # Prepare for Extraction: Data Organization 36 | # ----------------------------------------- 37 | 38 | ###################################################### 39 | # I will use a sample speech commands data set: 40 | 41 | ########################################################## 42 | # Designate path relevant for accessing audiodata 43 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir) 44 | 45 | ###################################################### 46 | # Choose Feature Type 47 | # ~~~~~~~~~~~~~~~~~~~ 48 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'. 49 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'. 50 | 51 | feature_type = 'fbank' 52 | 53 | ###################################################### 54 | # Set Duration of Audio 55 | # ~~~~~~~~~~~~~~~~~~~~~ 56 | # How much audio in seconds used from each audio file. 57 | # The example noise and speech files are only 1 second long 58 | dur_sec = 1 59 | 60 | 61 | ############################################################# 62 | # Built-In Functionality - soundpy extracts the features for you 63 | # --------------------------------------------------------------- 64 | 65 | ############################################################ 66 | # Define which data to use and which features to extract 67 | # Everything else is based on defaults. A feature folder with 68 | # the feature data will be created in the current working directory. 69 | # (Although, you can set this under the parameter `data_features_dir`) 70 | # `visualize` saves periodic images of the features extracted. 71 | # This is useful if you want to know what's going on during the process. 72 | extraction_dir = sp.envclassifier_feats(data_dir, 73 | feature_type=feature_type, 74 | dur_sec=dur_sec, 75 | visualize=True); 76 | 77 | ################################################################ 78 | # The extracted features, extraction settings applied, and 79 | # which audio files were assigned to which datasets 80 | # will be saved in the following directory: 81 | extraction_dir 82 | 83 | ############################################################ 84 | # Logged Information 85 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 86 | # Let's have a look at the files in the extraction_dir. The files ending 87 | # with .npy extension contain the feature data; the .csv files contain 88 | # logged information. 89 | featfiles = list(extraction_dir.glob('*.*')) 90 | for f in featfiles: 91 | print(f.name) 92 | 93 | ############################################################ 94 | # Feature Settings 95 | # ~~~~~~~~~~~~~~~~~~ 96 | # Since much was conducted behind the scenes, it's nice to know how the features 97 | # were extracted, for example, the sample rate and number of frequency bins applied, etc. 98 | feat_settings = sp.utils.load_dict( 99 | extraction_dir.joinpath('log_extraction_settings.csv')) 100 | for key, value in feat_settings.items(): 101 | print(key, ' ---> ', value) 102 | 103 | 104 | ############################################################ 105 | # Labeled Data 106 | # ~~~~~~~~~~~~~~~~~~ 107 | # These are the labels and their encoded values: 108 | encode_dict = sp.utils.load_dict( 109 | extraction_dir.joinpath('dict_encode.csv')) 110 | for key, value in encode_dict.items(): 111 | print(key, ' ---> ', value) 112 | -------------------------------------------------------------------------------- /docs/source/examples/plot_filter_out_noise.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | """ 4 | =========================== 5 | Filter Out Background Noise 6 | =========================== 7 | 8 | Filter out background noise from noisy speech signals. 9 | 10 | To see how soundpy implements this, see `soundpy.builtin.filtersignal`. 11 | 12 | As a general note for filtering, the Wiener Filter is the default filter for soundpy. It seems to filter signals more consequently than the Band Spectral Subtraction Filter. 13 | """ 14 | 15 | 16 | ############################################################################################### 17 | # 18 | 19 | 20 | ##################################################################### 21 | 22 | # Let's import soundpy, and ipd for playing audio data 23 | import soundpy as sp 24 | import IPython.display as ipd 25 | 26 | 27 | ###################################################### 28 | # Define the noisy and clean speech audio files. 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 30 | # Note: these files are available in the soundpy repo. 31 | # Designate path relevant for accessing audiodata 32 | sp_dir = '../../../' 33 | 34 | ########################################################## 35 | # Noise sample: 36 | noise = '{}audiodata/background_samples/traffic.wav'.format(sp_dir) 37 | noise = sp.string2pathlib(noise) 38 | speech = '{}audiodata/python.wav'.format(sp_dir) 39 | speech = sp.utils.string2pathlib(speech) 40 | 41 | ########################################################## 42 | # For filtering, we will set the sample rate to be quite high: 43 | sr = 48000 44 | 45 | ########################################################## 46 | # Create noisy speech signal as SNR 10 47 | noisy, snr_measured = sp.dsp.add_backgroundsound( 48 | speech, 49 | noise, 50 | sr = sr, 51 | snr = 10, 52 | total_len_sec = 2, 53 | pad_mainsound_sec = 0.5) 54 | 55 | ########################################################## 56 | # Hear and see the noisy speech 57 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 58 | 59 | ipd.Audio(noisy,rate=sr) 60 | 61 | ########################################################## 62 | sp.plotsound(noisy, sr=sr, feature_type='signal', 63 | title = 'Noisy Speech', subprocess=True) 64 | 65 | 66 | ########################################################## 67 | # Hear and see the clean speech 68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 | s, sr = sp.loadsound(speech, sr=sr) 70 | ipd.Audio(s,rate=sr) 71 | 72 | ########################################################## 73 | sp.plotsound(s, sr=sr, feature_type='signal', 74 | title = 'Clean Speech', subprocess=True) 75 | 76 | 77 | ########################################################## 78 | # Filter the noisy speech 79 | # ^^^^^^^^^^^^^^^^^^^^^^^ 80 | 81 | ########################################################## 82 | # Wiener Filter 83 | # ~~~~~~~~~~~~~ 84 | 85 | ########################################################## 86 | # Let's filter with a Wiener filter: 87 | noisy_wf, sr = sp.filtersignal(noisy, 88 | sr = sr, 89 | filter_type = 'wiener') # default 90 | 91 | ########################################################## 92 | ipd.Audio(noisy_wf,rate=sr) 93 | 94 | ########################################################## 95 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal', 96 | title = 'Noisy Speech: Wiener Filter', 97 | subprocess=True) 98 | 99 | ################################################################# 100 | # Wiener Filter with Postfilter 101 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 102 | 103 | ########################################################## 104 | # Let's filter with a Wiener filter and postfilter 105 | noisy_wfpf, sr = sp.filtersignal(noisy, 106 | sr = sr, 107 | filter_type = 'wiener', 108 | apply_postfilter = True) 109 | 110 | ########################################################## 111 | ipd.Audio(noisy_wfpf,rate=sr) 112 | 113 | ########################################################## 114 | sp.plotsound(noisy_wfpf, sr=sr, feature_type = 'signal', 115 | title = 'Noisy Speech: Wiener Filter with Postfilter', 116 | subprocess=True) 117 | 118 | ################################################################# 119 | # Band Spectral Subtraction 120 | # ~~~~~~~~~~~~~~~~~~~~~~~~~ 121 | 122 | ########################################################## 123 | # Let's filter using band spectral subtraction 124 | noisy_bs, sr = sp.filtersignal(noisy, 125 | sr = sr, 126 | filter_type = 'bandspec') 127 | 128 | ########################################################## 129 | ipd.Audio(noisy_bs,rate=sr) 130 | 131 | ########################################################## 132 | sp.plotsound(noisy_bs, sr = sr, feature_type = 'signal', 133 | title = 'Noisy Speech: Band Spectral Subtraction', 134 | subprocess=True) 135 | 136 | 137 | ################################################################# 138 | # Band Spectral Subtraction with Postfilter 139 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 140 | 141 | ######################################################################### 142 | # Finally, let's filter using band spectral subtraction with a postfilter 143 | noisy_bspf, sr = sp.filtersignal(noisy, 144 | sr = sr, 145 | filter_type = 'bandspec', 146 | apply_postfilter = True) 147 | 148 | ########################################################## 149 | ipd.Audio(noisy_bspf,rate=sr) 150 | 151 | ########################################################## 152 | sp.plotsound(noisy_bspf, sr = sr, feature_type = 'signal', 153 | title = 'Noisy Speech: Band Spectral Subtraction with Postfilter', 154 | subprocess=True) 155 | 156 | 157 | ########################################################## 158 | # Filter: increase the scale 159 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^ 160 | 161 | ########################################################## 162 | # Let's filter with a Wiener filter: 163 | filter_scale = 5 164 | noisy_wf, sr = sp.filtersignal(noisy, 165 | sr=sr, 166 | filter_type = 'wiener', 167 | filter_scale = filter_scale) 168 | 169 | ########################################################## 170 | # Wiener Filter 171 | # ~~~~~~~~~~~~~ 172 | 173 | ########################################################## 174 | ipd.Audio(noisy_wf,rate=sr) 175 | 176 | ########################################################## 177 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal', 178 | title = 'Noisy Speech: Wiener Filter Scale {}'.format(filter_scale), 179 | subprocess=True) 180 | 181 | ################################################################# 182 | # Wiener Filter with Postfilter 183 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 184 | 185 | ########################################################## 186 | # Let's filter with a Wiener filter and postfilter 187 | noisy_wfpf, sr = sp.filtersignal(noisy, 188 | sr = sr, 189 | filter_type = 'wiener', 190 | apply_postfilter = True, 191 | filter_scale = filter_scale) 192 | 193 | ########################################################## 194 | ipd.Audio(noisy_wfpf,rate = sr) 195 | 196 | ########################################################## 197 | sp.plotsound(noisy_wfpf, sr = sr, feature_type = 'signal', 198 | title = 'Noisy Speech: Wiener Filter with Postfilter Scale {}'.format(filter_scale), 199 | subprocess=True) 200 | -------------------------------------------------------------------------------- /docs/source/examples/plot_implement_denoiser.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ================================= 4 | Implement a Denoising Autoencoder 5 | ================================= 6 | 7 | Implement denoising autoencoder to denoise a noisy speech signal. 8 | 9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_run`. 10 | """ 11 | 12 | 13 | ############################################################################################ 14 | # 15 | 16 | ##################################################################### 17 | # Let's import soundpy and other packages 18 | import soundpy as sp 19 | import numpy as np 20 | # for playing audio in this notebook: 21 | import IPython.display as ipd 22 | 23 | ##################################################################### 24 | # As well as the deep learning component of soundpy 25 | from soundpy import models as spdl 26 | 27 | ###################################################### 28 | # Prepare for Implementation: Data Organization 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 30 | 31 | ########################################################## 32 | # Set path relevant for audio data for this example 33 | sp_dir = '../../../' 34 | 35 | ###################################################### 36 | # Set model pathway 37 | # ~~~~~~~~~~~~~~~~~ 38 | # Currently, this expects a model saved with weights, with a .h5 extension. 39 | # (See `model` below) 40 | 41 | ###################################################### 42 | # The soundpy repo offers a pre-trained denoiser, which we'll use. 43 | model = '{}audiodata/models/'.format(sp_dir)+\ 44 | 'denoiser/example_denoiser_stft.h5' 45 | # ensure is a pathlib.PosixPath object 46 | print(model) 47 | model = sp.utils.string2pathlib(model) 48 | model_dir = model.parent 49 | 50 | ######################################################### 51 | # What is in this folder? 52 | files = list(model_dir.glob('*.*')) 53 | for f in files: 54 | print(f.name) 55 | 56 | ###################################################### 57 | # Provide dictionary with feature extraction settings 58 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 59 | 60 | ######################################################### 61 | # If soundpy extracts features for you, a 'log_extraction_settings.csv' 62 | # file will be saved, which includes relevant feature settings for implementing 63 | # the model; see `soundpy.feats.save_features_datasets` 64 | feat_settings = sp.utils.load_dict( 65 | model_dir.joinpath('log_extraction_settings.csv')) 66 | for key, value in feat_settings.items(): 67 | print(key, ' --> ', value) 68 | # change objects that were string to original format 69 | import ast 70 | try: 71 | feat_settings[key] = ast.literal_eval(value) 72 | except ValueError: 73 | pass 74 | except SyntaxError: 75 | pass 76 | 77 | ######################################################### 78 | # For the purposes of plotting, let's use some of the settings defined: 79 | feature_type = feat_settings['feature_type'] 80 | sr = feat_settings['sr'] 81 | 82 | ###################################################### 83 | # Provide new audio for the denoiser to denoise! 84 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 85 | 86 | ######################################################### 87 | # We'll use sample speech from the soundpy repo: 88 | speech = sp.string2pathlib('{}audiodata/python.wav'.format(sp_dir)) 89 | s, sr = sp.loadsound(speech, sr=sr) 90 | 91 | ######################################################### 92 | # Let's add some white noise (10 SNR) 93 | s_n = sp.augment.add_white_noise(s, sr=sr, snr=10) 94 | 95 | ############################################################## 96 | # What does the noisy audio sound like? 97 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 98 | ipd.Audio(s_n,rate=sr) 99 | 100 | ############################################################## 101 | # What does the noisy audio look like? 102 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 103 | sp.plotsound(s_n, sr = sr, feature_type='signal', subprocess=True) 104 | 105 | ############################################################## 106 | # What does the clean audio sound like? 107 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 108 | ipd.Audio(s,rate=sr) 109 | 110 | ############################################################## 111 | # What does the clean audio look like? 112 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 113 | sp.plotsound(s, sr = sr, feature_type='signal', subprocess=True) 114 | 115 | ######################################################################### 116 | # Built-In Denoiser Functionality 117 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 118 | 119 | ############################################################## 120 | # We just need to feed the model path, the noisy sample path, and 121 | # the feature settings dictionary we looked at above. 122 | y, sr = spdl.denoiser_run(model, s_n, feat_settings) 123 | 124 | ########################################################## 125 | # How does the output sound? 126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~ 127 | ipd.Audio(y,rate=sr) 128 | 129 | ########################################################## 130 | # How does is the output look? 131 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 132 | sp.plotsound(y, sr=sr, feature_type = feature_type, subprocess=True) 133 | 134 | ########################################################## 135 | # How do the features compare? 136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 137 | 138 | ########################################################## 139 | # STFT features of the noisy input speech: 140 | sp.plotsound(s_n, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db', 141 | title = 'Noisy input: STFT features', subprocess=True) 142 | 143 | ########################################################## 144 | # STFT features of the output 145 | sp.plotsound(y, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db', 146 | title = 'Denoiser Output: STFT features', subprocess=True) 147 | 148 | ########################################################## 149 | # STFT features of the clean version of the audio: 150 | sp.plotsound(s, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db', 151 | title = 'Clean "target" audio: STFT features', subprocess=True) 152 | 153 | 154 | ########################################################## 155 | # It's not perfect but for a pretty simple implementation, the noise is gone 156 | # and you can hear the person speaking. Pretty cool! 157 | -------------------------------------------------------------------------------- /docs/source/examples/plot_signals_and_features.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ======================= 4 | Create and Plot Signals 5 | ======================= 6 | 7 | Create and plot signals / noise; combine them at a specific SNR. 8 | 9 | To see how soundpy implements this, see `soundpy.dsp.generate_sound`, 10 | `soundpy.dsp.generate_noise` and `soundpy.dsp.add_backgroundsound`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | 17 | ##################################################################### 18 | # Let's import soundpy 19 | import soundpy as sp 20 | 21 | ########################################################################### 22 | # Create a Signal 23 | # ^^^^^^^^^^^^^^^ 24 | 25 | ######################################################################## 26 | # First let's set what sample rate we want to use 27 | sr = 44100 28 | 29 | 30 | ######################################################################### 31 | # Let's create a signal of 10 Hz 32 | sig1_hz = 10 33 | sig1, sr = sp.generate_sound(freq=sig1_hz, amplitude = 0.4, sr=sr, dur_sec=1) 34 | sp.plotsound(sig1, sr=sr, feature_type = 'signal', 35 | title = 'Signal: {} Hz'.format(sig1_hz), subprocess=True) 36 | 37 | 38 | ######################################################################### 39 | # Let's create a signal of 20 Hz 40 | sig2_hz = 20 41 | sig2, sr = sp.generate_sound(freq=sig2_hz, amplitude= 0.4, sr=sr, dur_sec=1) 42 | sp.plotsound(sig2, sr=sr, feature_type = 'signal', 43 | title = 'Signal: {} Hz'.format(sig2_hz), subprocess=True) 44 | 45 | ########################################################################### 46 | # Combine Signals 47 | # ^^^^^^^^^^^^^^^ 48 | 49 | 50 | ######################################################################### 51 | # Add them together and see what they look like: 52 | sig3 = sig1 + sig2 53 | sp.plotsound(sig3, sr=sr, feature_type = 'signal', 54 | title='Mixed Signals: {} Hz + {} Hz'.format(sig1_hz, sig2_hz), 55 | subprocess=True) 56 | 57 | 58 | ########################################################################## 59 | # Generate Pseudo-Random Noise 60 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 61 | 62 | 63 | ######################################################################### 64 | # Create noise to add to the signal: 65 | noise = sp.generate_noise(len(sig3), amplitude=0.02, random_seed=40) 66 | sp.plotsound(noise, sr=sr, feature_type = 'signal', 67 | title='Random Noise', subprocess=True) 68 | 69 | ########################################################################### 70 | # Control SNR: Adding a Background Sound 71 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 72 | 73 | ######################################################################### 74 | # Add noise at signal-to-noise ratio of 40 75 | sig_noisy, snr = sp.dsp.add_backgroundsound( 76 | audio_main = sig3, 77 | audio_background = noise, 78 | sr = sr, 79 | snr = 40, 80 | clip_at_zero = False) 81 | 82 | # keep energy between 1 and -1 83 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 84 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 40 SNR', 85 | subprocess=True) 86 | 87 | ######################################################################### 88 | # Add noise at signal-to-noise ratio of 20 89 | sig_noisy, snr = sp.dsp.add_backgroundsound( 90 | audio_main = sig3, 91 | audio_background = noise, 92 | sr = sr, 93 | snr = 20) 94 | # keep energy between 1 and -1 95 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 96 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 20 SNR', 97 | subprocess=True) 98 | 99 | ######################################################################### 100 | # Add noise at signal-to-noise ratio of 10 101 | sig_noisy, snr = sp.dsp.add_backgroundsound( 102 | audio_main = sig3, 103 | audio_background = noise, 104 | sr = sr, 105 | snr = 10) 106 | # keep energy between 1 and -1 107 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 108 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 10 SNR', 109 | subprocess=True) 110 | 111 | ######################################################################### 112 | # Add noise at signal-to-noise ratio of 0 113 | sig_noisy, snr = sp.dsp.add_backgroundsound( 114 | audio_main = sig3, 115 | audio_background = noise, 116 | sr = sr, 117 | snr = 0) 118 | # keep energy between 1 and -1 119 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 120 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 0 SNR', 121 | subprocess=True) 122 | 123 | 124 | ######################################################################### 125 | # Add noise at signal-to-noise ratio of -10 126 | sig_noisy, snr = sp.dsp.add_backgroundsound( 127 | audio_main = sig3, 128 | audio_background = noise, 129 | sr = sr, 130 | snr = -10) 131 | # keep energy between 1 and -1 132 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1) 133 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: -10 SNR', 134 | subprocess=True) 135 | -------------------------------------------------------------------------------- /docs/source/examples/plot_train_classifier.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ============================ 4 | Train an Acoustic Classifier 5 | ============================ 6 | 7 | Train an acoustic classifier on speech or noise features. 8 | 9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_train`. 10 | """ 11 | 12 | ############################################################################################### 13 | # 14 | import os, sys 15 | import inspect 16 | currentdir = os.path.dirname(os.path.abspath( 17 | inspect.getfile(inspect.currentframe()))) 18 | parentdir = os.path.dirname(currentdir) 19 | parparentdir = os.path.dirname(parentdir) 20 | packagedir = os.path.dirname(parparentdir) 21 | sys.path.insert(0, packagedir) 22 | 23 | import matplotlib.pyplot as plt 24 | import IPython.display as ipd 25 | package_dir = '../../../' 26 | os.chdir(package_dir) 27 | sp_dir = package_dir 28 | 29 | 30 | ##################################################################### 31 | # Let's import soundpy for handling sound 32 | import soundpy as sp 33 | ##################################################################### 34 | # As well as the deep learning component of soundpy 35 | from soundpy import models as spdl 36 | 37 | 38 | ###################################################### 39 | # Prepare for Training: Data Organization 40 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 41 | 42 | ########################################################## 43 | # Set path relevant for audio data for this example 44 | 45 | ###################################################### 46 | # I will load previously extracted features (from the Speech Commands Dataset) 47 | # See `soundpy.feats.save_features_datasets` or `soundpy.builtin.envclassifier_feats` 48 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\ 49 | 'envclassifier/example_feats_fbank/' 50 | 51 | ######################################################### 52 | # What is in this folder? 53 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir) 54 | files = list(feature_extraction_dir.glob('*.*')) 55 | for f in files: 56 | print(f.name) 57 | 58 | ######################################################### 59 | # The .npy files contain the features themselves, in train, validation, and 60 | # test datasets: 61 | files = list(feature_extraction_dir.glob('*.npy')) 62 | for f in files: 63 | print(f.name) 64 | 65 | ######################################################### 66 | # The .csv files contain information about how the features were extracted 67 | files = list(feature_extraction_dir.glob('*.csv')) 68 | for f in files: 69 | print(f.name) 70 | 71 | ######################################################### 72 | # We'll have a look at which features were extracted and other settings: 73 | feat_settings = sp.utils.load_dict( 74 | feature_extraction_dir.joinpath('log_extraction_settings.csv')) 75 | for key, value in feat_settings.items(): 76 | print(key, ' --> ', value) 77 | 78 | ######################################################### 79 | # For more about these settings, see `soundpy.feats.save_features_datasets`. 80 | 81 | ######################################################### 82 | # We'll have a look at the audio files that were assigned 83 | # to the train, val, and test datasets. 84 | audio_datasets = sp.utils.load_dict( 85 | feature_extraction_dir.joinpath('dataset_audiofiles.csv')) 86 | count = 0 87 | for key, value in audio_datasets.items(): 88 | print(key, ' --> ', value) 89 | count += 1 90 | if count > 5: 91 | break 92 | 93 | ############################################################# 94 | # Built-In Functionality: soundpy does everything for you 95 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 96 | # For more about this function, see `soundpy.models.builtin.envclassifier_train`. 97 | 98 | ############################################################# 99 | model_dir, history = spdl.envclassifier_train( 100 | feature_extraction_dir = feature_extraction_dir, 101 | epochs = 10, 102 | patience = 5) 103 | 104 | ############################################################# 105 | # Where the model and logs are located: 106 | model_dir 107 | 108 | ############################################################# 109 | # Let's plot how the model performed (on this mini dataset) 110 | import matplotlib.pyplot as plt 111 | plt.clf() 112 | plt.plot(history.history['accuracy']) 113 | plt.plot(history.history['val_accuracy']) 114 | plt.title('model accuracy') 115 | plt.ylabel('accuracy') 116 | plt.xlabel('epoch') 117 | plt.legend(['train', 'val'], loc='upper right') 118 | plt.savefig('accuracy.png') 119 | -------------------------------------------------------------------------------- /docs/source/examples/plot_train_denoiser.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | ============================= 4 | Train a Denoising Autoencoder 5 | ============================= 6 | 7 | Train a denoising autoencoder with clean and noisy acoustic features. 8 | 9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_train`, 10 | `soundpy.builtin.denoiser_feats` and `soundpy.builtin.create_denoise_data`. 11 | """ 12 | 13 | 14 | ############################################################################################### 15 | # 16 | import os, sys 17 | import inspect 18 | currentdir = os.path.dirname(os.path.abspath( 19 | inspect.getfile(inspect.currentframe()))) 20 | parentdir = os.path.dirname(currentdir) 21 | parparentdir = os.path.dirname(parentdir) 22 | packagedir = os.path.dirname(parparentdir) 23 | sys.path.insert(0, packagedir) 24 | 25 | import matplotlib.pyplot as plt 26 | import IPython.display as ipd 27 | package_dir = '../../../' 28 | os.chdir(package_dir) 29 | sp_dir = package_dir 30 | 31 | 32 | ##################################################################### 33 | # Let's import soundpy for handling sound 34 | import soundpy as sp 35 | ##################################################################### 36 | # As well as the deep learning component of soundpy 37 | from soundpy import models as spdl 38 | 39 | 40 | ###################################################### 41 | # Prepare for Training: Data Organization 42 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 43 | 44 | ########################################################## 45 | # Designate path relevant for accessing audiodata 46 | 47 | 48 | ###################################################### 49 | # I will load previously extracted features (sample data), see `soundpy.feats.save_features_datasets` or `soundpy.builtin.denoiser_feats` 50 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\ 51 | 'denoiser/example_feats_fbank/' 52 | 53 | ######################################################### 54 | # What is in this folder? 55 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir) 56 | files = list(feature_extraction_dir.glob('*.*')) 57 | for f in files: 58 | print(f.name) 59 | 60 | ######################################################### 61 | # The .npy files contain the features themselves, in train, validation, and 62 | # test datasets: 63 | files = list(feature_extraction_dir.glob('*.npy')) 64 | for f in files: 65 | print(f.name) 66 | 67 | ######################################################### 68 | # The .csv files contain information about how the features were extracted 69 | files = list(feature_extraction_dir.glob('*.csv')) 70 | for f in files: 71 | print(f.name) 72 | 73 | ######################################################### 74 | # We'll have a look at which features were extracted and other settings: 75 | feat_settings = sp.utils.load_dict( 76 | feature_extraction_dir.joinpath('log_extraction_settings.csv')) 77 | for key, value in feat_settings.items(): 78 | print(key, ' --> ', value) 79 | 80 | ######################################################### 81 | # For more about these settings, see `soundpy.feats.save_features_datasets`. 82 | 83 | ######################################################### 84 | # We'll have a look at the audio files that were assigned 85 | # to the train, val, and test datasets. 86 | audio_datasets = sp.utils.load_dict( 87 | feature_extraction_dir.joinpath('audiofiles_datasets_clean.csv')) 88 | count = 0 89 | for key, value in audio_datasets.items(): 90 | print(key, ' --> ', value) 91 | count += 1 92 | if count > 5: 93 | break 94 | 95 | ############################################################# 96 | # Built-In Functionality: soundpy does everything for you 97 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 98 | # For more about this, see `soundpy.builtin.denoiser_train`. 99 | 100 | ############################################################# 101 | model_dir, history = spdl.denoiser_train( 102 | feature_extraction_dir = feature_extraction_dir, 103 | epochs = 10) 104 | 105 | ######################################################### 106 | 107 | 108 | ############################################################# 109 | # Where the model and logs are located: 110 | model_dir 111 | 112 | 113 | ############################################################# 114 | # Let's plot how the model performed (on this mini dataset) 115 | import matplotlib.pyplot as plt 116 | plt.plot(history.history['loss']) 117 | plt.plot(history.history['val_loss']) 118 | plt.title('model loss') 119 | plt.ylabel('loss') 120 | plt.xlabel('epoch') 121 | plt.legend(['train', 'val'], loc='upper right') 122 | plt.savefig('loss.png') 123 | -------------------------------------------------------------------------------- /docs/source/exceptions.rst: -------------------------------------------------------------------------------- 1 | 2 | Customized Errors 3 | ----------------- 4 | 5 | .. automodule:: soundpy.exceptions 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/feats.rst: -------------------------------------------------------------------------------- 1 | 2 | Extract and manipulate audio features 3 | ------------------------------------- 4 | 5 | .. automodule:: soundpy.feats 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/files.rst: -------------------------------------------------------------------------------- 1 | 2 | Working with audio files 3 | ------------------------ 4 | 5 | .. automodule:: soundpy.files 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/filters.rst: -------------------------------------------------------------------------------- 1 | 2 | Filters: Wiener and Band Spectral Subtraction 3 | --------------------------------------------- 4 | 5 | .. automodule:: soundpy.filters 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | .. autoclass:: soundpy.filters.FilterSettings 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | .. automethod:: __init__ 16 | 17 | .. autoclass:: soundpy.filters.Filter 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | .. automethod:: __init__ 23 | 24 | 25 | .. autoclass:: soundpy.filters.WienerFilter 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | .. automethod:: __init__ 31 | 32 | 33 | .. autoclass:: soundpy.filters.BandSubtraction 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | .. automethod:: __init__ 39 | 40 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. SoundPy documentation master file, created by 2 | sphinx-quickstart on Mon Jun 15 11:57:18 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | SoundPy v0.1.0a3 7 | ================ 8 | 9 | Welcome to the docs! 10 | -------------------- 11 | 12 | 13 | SoundPy is a research based Python package_ for exploring and experimenting with sound and deep learning. NOTE: SoundPy is in alpha stage of development; please forgive any bugs that pop up, and also feel free/encouraged to open an issue_. 14 | 15 | Those who might find this useful: 16 | 17 | * speech and sound enthusiasts 18 | * digital signal processing / mathematics / physics / acoustics enthusiasts 19 | * deep learning enthusiasts 20 | * researchers 21 | * linguists 22 | * psycholinguists 23 | 24 | The main goal of SoundPy is to provide the code and functionality with more context via visualization, research, and mathematics. Most of the resources used to build the functionality stems from publicly available research and datasets. 25 | 26 | As it covers quite a large range, from audio file conversion to implementation of trained neural networks, the purpose of SoundPy is not to be the perfect implementation of all functions (although that is also a goal :P ), but rather a peak into how they *can* be implemented, hopefully offering people a foundation for trying out different ways of implementation (feature extraction, building neural networks, etc.). 27 | 28 | This project is still in the beginning stages and has a lot of room for growth, especially with contributors having a background / knowlege in data science, computer science, machine and deep learning, physics, acoustics, or dsp. Contributors from other backgrounds are also welcome! If you'd like SoundPy to do something it doesn't, try making it or create an issue. 29 | 30 | .. _PyPI: https://pypi.org/project/soundpy/ 31 | 32 | .. _package: https://github.com/a-n-rose/Python-Sound-Tool/tree/development 33 | 34 | .. _issue: https://github.com/a-n-rose/Python-Sound-Tool/issues 35 | 36 | .. toctree:: 37 | :maxdepth: 2 38 | 39 | example_cases.rst 40 | readme.rst 41 | 42 | 43 | .. toctree:: 44 | :maxdepth: 1 45 | 46 | changelog.rst 47 | 48 | * :ref:`genindex` 49 | * :ref:`modindex` 50 | * :ref:`search` 51 | 52 | :Author: 53 | Aislyn Rose 54 | 55 | rose.aislyn.noelle@gmail.com 56 | 57 | webpage_ 58 | 59 | github_ 60 | 61 | .. _webpage: https://a-n-rose.github.io/ 62 | 63 | .. _github : https://github.com/a-n-rose 64 | -------------------------------------------------------------------------------- /docs/source/model_dataprep.rst: -------------------------------------------------------------------------------- 1 | 2 | Feeding large datasets to models 3 | -------------------------------- 4 | 5 | .. autoclass:: soundpy.models.dataprep.Generator 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | .. automethod:: __init__ 11 | 12 | 13 | .. automodule:: soundpy.models.dataprep 14 | :members: 15 | :undoc-members: 16 | :show-inheritance: 17 | -------------------------------------------------------------------------------- /docs/source/modelsetup.rst: -------------------------------------------------------------------------------- 1 | 2 | Additional model setup (e.g. Early Stopping) 3 | -------------------------------------------- 4 | 5 | .. automodule:: soundpy.models.modelsetup 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | ============================== 2 | SoundPy Functionality v0.1.0a3 3 | ============================== 4 | 5 | .. include:: builtin_sp.rst 6 | 7 | .. include:: builtin_spdl.rst 8 | 9 | .. include:: augment.rst 10 | 11 | .. include:: files.rst 12 | 13 | .. include:: datasets.rst 14 | 15 | .. include:: dsp.rst 16 | 17 | .. include:: filters.rst 18 | 19 | .. include:: feats.rst 20 | 21 | .. include:: template_models.rst 22 | 23 | .. include:: modelsetup.rst 24 | 25 | .. include:: model_dataprep.rst 26 | 27 | .. include:: utils.rst 28 | 29 | .. include:: exceptions.rst 30 | -------------------------------------------------------------------------------- /docs/source/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: modules.rst 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/source/template_models.rst: -------------------------------------------------------------------------------- 1 | Template deep neural networks 2 | ----------------------------- 3 | 4 | .. automodule:: soundpy.models.template_models 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/utils.rst: -------------------------------------------------------------------------------- 1 | 2 | Other useful non-specific functionality 3 | --------------------------------------- 4 | 5 | .. automodule:: soundpy.utils 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/versions.rst: -------------------------------------------------------------------------------- 1 | ****************************************** 2 | SoundPy Versions Available as PyPI Package 3 | ****************************************** 4 | 5 | .. toctree:: 6 | :maxdepth: 1 7 | 8 | 0.1.0a2/index.rst 9 | 10 | 0.1.0a3/index.rst 11 | 12 | -------------------------------------------------------------------------------- /new_version_updates.md: -------------------------------------------------------------------------------- 1 | # Updates of v0.1.0a3 release: 2 | 3 | ## Updates 4 | - don't use librosa for feature extraction anymore. But compatible with previous versions. 5 | - parameter: frames_per_sample and context_window, with depreciation warning 6 | Just remove these parameters from feature extraction and limit to generators. Otherwise too messy and complex 7 | - soundpy.models.builtin.implement_denoiser() raise warning if cleaned features cannot be 8 | converted to raw audio samples. 9 | - BUG FIX: soundpy.feats.plot can now be used from within generator using backend Agg and 10 | then switch to Tkinker backend using use_tkinker parameter for normal use outside of training. 11 | - require additional tensors to be added to the desired shape and then supplied to generator to make shape process more explicit in generator. 12 | 13 | changed parameter (Generator) normalized to normalize (opposite bool); removed add_tensor_last parameter, adjusted grayscale2color sections: can be applied to 2D data; set sr default to 22050 14 | 15 | - Got the augment cnn builtin functionality to run with pre-trained features.. needs cleaning 16 | - got plotsound, plot vad, and plot dom freq, to work with stereo sound 17 | 18 | Removing from envclassifier_extract_train: 19 | dataset_dict = None, 20 | num_labels = None, 21 | 22 | 23 | ## Updates of v0.1.0a2 release: 24 | 25 | ### Updated Dependencies 26 | - Updated dependencies to newest versions still compatible with Tensorflow 2.1.0 27 | - Note: bug in training with generators occurs with Tensorflow 2.2.0+. Models trained via generators fail to learn. Therefore, Tensorflow is limited to version 2.1.0 until that bug is fixed. 28 | 29 | ### GPU option added 30 | - provide instructions for running Docker image for GPU 31 | 32 | ### soundpy.dsp.vad 33 | - add `use_beg_ms` parameter: improved VAD recognition of silences post speech. 34 | - raise warning for sample rates lower than 44100 Hz. VAD seems to fail at lower sample rates. 35 | 36 | ### soundpy.feats.get_vad_samples and soundpy.feats.get_vad_stft 37 | - moved from dsp module to the feats module 38 | - add `extend_window_ms` paremeter: can extend VAD window if desired. Useful in higher SNR environments. 39 | - raise warning for sample rates lower than 44100 Hz. VAD seems to fail at lower sample rates. 40 | 41 | ### added soundpy.feats.get_samples_clipped and soundpy.feats.get_stft_clipped 42 | - another option for VAD 43 | - clips beginning and ending of audio data where high energy sound starts and ends. 44 | 45 | ### soundpy.models.dataprep.GeneratorFeatExtraction 46 | - can extract and augment features from audio files as each audio file fed to model. 47 | - example can be viewed: soundpy.models.builtin.envclassifier_extract_train 48 | - note: still very experimental 49 | 50 | ### soundpy.dsp.add_backgroundsound 51 | - improvements in the smoothness of the added signal. 52 | - soundpy.dsp.clip_at_zero 53 | - improved soundpy.dsp.vad and soundpy.feats.get_vad_stft 54 | 55 | ### soundpy.feats.normalize 56 | - can use it: soundpy.normalize (don't need to remember dsp or feats) 57 | 58 | ### soundpy.dsp.remove_dc_bias 59 | - implemented in soundpy.files.loadsound() and soundpy.files.savesound() 60 | - vastly improves the ability to work with and combine signals. 61 | 62 | ### soundpy.dsp.clip_at_zero 63 | - clips beginning and ending audio at zero crossings (at negative to positive zero crossings) 64 | - useful when concatenating signals 65 | - useful for removing clicks at beginning or ending of audio signals 66 | 67 | ### soundpy.dsp.apply_sample_length 68 | - can now mirror the sound as a form of sound extention with parameter `mirror_sound`. 69 | 70 | ### Removed soundpy_online (and therefore mybinder as well) 71 | - for the time being, this is too much work to keep up. Eventually plan on bringing this back in a more maintainable manner. 72 | 73 | ### Added stereo sound functionality to the following functions: 74 | - soundpy.dsp.add_backgroundsound 75 | - soundpy.dsp.clip_at_zero 76 | - soundpy.dsp.calc_fft 77 | - soundpy.feats.get_stft 78 | - soundpy.feats.get_vad_stft 79 | 80 | ### New functions related to stereo sound 81 | - soundpy.dsp.ismono for checking if a signal is mono or stereo 82 | - soundpy.dsp.average_channels for averaging amplitude in all channels (e.g. identifying when energetic sounds start / end: want to consider all channels) 83 | - soundpy.dsp.add_channels for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound) 84 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow>=2.1.0 2 | numpy 3 | scipy 4 | scikit-learn 5 | librosa 6 | python-speech-features 7 | matplotlib 8 | soundfile 9 | numba 10 | scikit-image>=0.17.2 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from setuptools import setup, find_packages 3 | 4 | # The directory containing this file 5 | HERE = pathlib.Path(__file__).parent 6 | 7 | # The text of the README file 8 | README = (HERE / "README.md").read_text() 9 | 10 | dependencies='' 11 | with open("requirements.txt","r") as f: 12 | dependencies = f.read().splitlines() 13 | 14 | # This call to setup() does all the work 15 | setup( 16 | name="soundpy", 17 | version="0.1.0a3", 18 | description="A research-based framework for exploring sound as well as machine learning in the context of sound.", 19 | long_description=README, 20 | long_description_content_type="text/markdown", 21 | url="https://github.com/a-n-rose/Python-Sound-Tool", 22 | author="Aislyn Rose", 23 | author_email="rose.aislyn.noelle@gmail.com", 24 | license="AGPL-3.0", 25 | classifiers=[ 26 | "License :: OSI Approved :: GNU Affero General Public License v3", 27 | "Programming Language :: Python :: 3", 28 | "Programming Language :: Python :: 3.6", 29 | "Programming Language :: Python :: 3.8", 30 | ], 31 | packages=find_packages(exclude=("tests","docs", "jupyter_notebooks")), 32 | include_package_data=True, 33 | install_requires=dependencies, 34 | python_requires=">=3.6.9", 35 | ) 36 | -------------------------------------------------------------------------------- /soundpy/__init__.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | from . import utils 3 | from . import feats 4 | from . import files 5 | from . import datasets 6 | from . import filters 7 | from . import dsp 8 | from . import builtin 9 | from . import exceptions as errors 10 | from . import augment 11 | from .utils import check_dir, string2pathlib 12 | from .files import loadsound, savesound 13 | from .feats import plotsound, normalize 14 | from .filters import WienerFilter, BandSubtraction 15 | from .dsp import generate_sound, generate_noise 16 | from .builtin import envclassifier_feats, denoiser_feats, filtersignal 17 | 18 | __all__=['utils', 'feats', 'filters', 'WienerFilter', 'BandSubtraction', 19 | 'filtersignal', 'dsp','errors', 'plotsound', 'loadsound', 'savesound', 20 | 'datasets', 'envclassifier_feats', 'denoiser_feats', 'generate_sound', 'playsound', 21 | 'generate_noise', 'builtin', 'augment', 'check_dir', 'string2pathlib', 22 | 'normalize'] 23 | -------------------------------------------------------------------------------- /soundpy/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/soundpy/__init__.pyc -------------------------------------------------------------------------------- /soundpy/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`soundpy.exceptions` module includes customized errors. 3 | """ 4 | 5 | def notsufficientdata_error(numtrain, numval, numtest, expected_numtrain): 6 | raise ValueError('Not enough training data:'+\ 7 | '\nNumber train samples: {} '.format(numtrain)+\ 8 | '(Minumum expected: {})'.format(expected_numtrain)+\ 9 | '\nNumber val samples: {}'.format(numval)+\ 10 | '\nNumber test samples: {}'.format(numtest) +\ 11 | '\n\nPlease lower `perc_train` or collect more audio data.') 12 | 13 | def numfeatures_incompatible_templatemodel(): 14 | raise ValueError('ERROR: Number of features is incompatible with the template model. '+\ 15 | 'Try a higher number or rely on the defaults. Apologies for this inconvenience.') 16 | -------------------------------------------------------------------------------- /soundpy/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataprep import Generator, GeneratorFeatExtraction, make_gen_callable 2 | from .template_models import cnn_classifier, autoencoder_denoise, resnet50_classifier, \ 3 | cnnlstm_classifier 4 | from .modelsetup import setup_callbacks, setup_layers 5 | from . import plot 6 | from . import builtin 7 | from .builtin import denoiser_train, envclassifier_train, denoiser_run, cnnlstm_train, \ 8 | resnet50_train, envclassifier_extract_train, cnnlstm_extract_train, envclassifier_run 9 | 10 | __all__ = ['Generator', 'GeneratorFeatExtraction', 11 | 'cnn_classifier', 'autoencoder_denoise', 'resnet50_classifier', 12 | 'setup_callbacks', 'plot', 'cnnlstm_classifier', 'builtin', 'denoiser_train', 13 | 'envclassifier_train', 'denoiser_run', 'cnnlstm_train', 'resnet50_train', 14 | 'envclassifier_extract_train','make_gen_callable', 'setup_layers', 15 | 'cnnlstm_extract_train', 'envclassifier_run'] 16 | -------------------------------------------------------------------------------- /soundpy/models/plot.py: -------------------------------------------------------------------------------- 1 | import tensorflow 2 | from tensorflow.keras.models import Model 3 | from tensorflow.keras.models import load_model 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | import os, sys 8 | import inspect 9 | currentdir = os.path.dirname(os.path.abspath( 10 | inspect.getfile(inspect.currentframe()))) 11 | packagedir = os.path.dirname(currentdir) 12 | sys.path.insert(0, packagedir) 13 | import soundpy as pyst 14 | 15 | 16 | def featuremaps(features, model, image_dir='./feature_maps/'): 17 | '''Saves the feature maps of each convolutional layer as .png file. 18 | 19 | References 20 | ---------- 21 | Brownlee, Jason (2019, May, 6). How to Visualize Filters and Feature 22 | Maps in Convolutional Neural Networks. Machine Learning Mastery. 23 | https://machinelearningmastery.com/how-to-visualize-filters-and-feature-maps-in-convolutional-neural-networks/ 24 | ''' 25 | conv_idx = [] 26 | for i in range(len(model.layers)): 27 | layer = model.layers[i] 28 | if 'conv' in layer.name: 29 | conv_idx.append(i) 30 | for idx in conv_idx: 31 | model_featmaps = Model(inputs = model.inputs, 32 | outputs = model.layers[idx].output) 33 | featuremaps = model_featmaps.predict(features) 34 | for i in range(featuremaps.shape[-1]): 35 | plt.clf() 36 | plt.imshow(featuremaps[0,:,:,i], cmap='gray') 37 | image_dir = sp.utils.check_dir(image_dir, make=True) 38 | image_path = image_dir.joinpath('layer_{}'.format(idx), 39 | 'featmap_{}.png'.format(i)) 40 | image_par = sp.utils.check_dir(image_path.parent, make=True) 41 | plt.savefig(image_path) 42 | -------------------------------------------------------------------------------- /soundpy/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/soundpy/utils.pyc -------------------------------------------------------------------------------- /start_jup_env.sh: -------------------------------------------------------------------------------- 1 | docker run -it --rm \ 2 | --gpus all \ 3 | --privileged=true \ 4 | -v "$PWD":"/root/soundpy/" \ 5 | -p 8888:8888 aju 6 | #-v "/audiodir/data":"/root/soundpy/data" \ 7 | -------------------------------------------------------------------------------- /tests/inspect_functions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | From NLTK decorators: https://github.com/nltk/nltk/blob/develop/nltk/decorators.py 3 | 4 | """ 5 | Decorator module by Michele Simionato 6 | Copyright Michele Simionato, distributed under the terms of the BSD License (see below). 7 | http://www.phyast.pitt.edu/~micheles/python/documentation.html 8 | Included in NLTK for its support of a nice memoization decorator. 9 | """ 10 | ''' 11 | 12 | 13 | import inspect 14 | 15 | def __legacysignature(signature): 16 | """ 17 | For retrocompatibility reasons, we don't use a standard Signature. 18 | Instead, we use the string generated by this method. 19 | Basically, from a Signature we create a string and remove the default values. 20 | """ 21 | listsignature = str(signature)[1:-1].split(",") 22 | for counter, param in enumerate(listsignature): 23 | if param.count("=") > 0: 24 | listsignature[counter] = param[0:param.index("=")].strip() 25 | else: 26 | listsignature[counter] = param.strip() 27 | return ", ".join(listsignature) 28 | 29 | def getinfo(func): 30 | """ 31 | Returns an info dictionary containing: 32 | - name (the name of the function : str) 33 | - argnames (the names of the arguments : list) 34 | - defaults (the values of the default arguments : tuple) 35 | - signature (the signature : str) 36 | - fullsignature (the full signature : Signature) 37 | - doc (the docstring : str) 38 | - module (the module name : str) 39 | - dict (the function __dict__ : str) 40 | >>> def f(self, x=1, y=2, *args, **kw): pass 41 | >>> info = getinfo(f) 42 | >>> info["name"] 43 | 'f' 44 | >>> info["argnames"] 45 | ['self', 'x', 'y', 'args', 'kw'] 46 | >>> info["defaults"] 47 | (1, 2) 48 | >>> info["signature"] 49 | 'self, x, y, *args, **kw' 50 | >>> info["fullsignature"] 51 | 52 | """ 53 | assert inspect.ismethod(func) or inspect.isfunction(func) 54 | argspec = inspect.getfullargspec(func) 55 | regargs, varargs, varkwargs = argspec[:3] 56 | argnames = list(regargs) 57 | if varargs: 58 | argnames.append(varargs) 59 | if varkwargs: 60 | argnames.append(varkwargs) 61 | fullsignature = inspect.signature(func) 62 | # Convert Signature to str 63 | signature = __legacysignature(fullsignature) 64 | 65 | 66 | # pypy compatibility 67 | if hasattr(func, "__closure__"): 68 | _closure = func.__closure__ 69 | _globals = func.__globals__ 70 | else: 71 | _closure = func.func_closure 72 | _globals = func.func_globals 73 | 74 | return dict( 75 | name=func.__name__, 76 | argnames=argnames, 77 | signature=signature, 78 | fullsignature=fullsignature, 79 | defaults=func.__defaults__, 80 | doc=func.__doc__, 81 | module=func.__module__, 82 | dict=func.__dict__, 83 | globals=_globals, 84 | closure=_closure, 85 | ) 86 | 87 | ########################## LEGALESE ############################### 88 | 89 | ## Redistributions of source code must retain the above copyright 90 | ## notice, this list of conditions and the following disclaimer. 91 | ## Redistributions in bytecode form must reproduce the above copyright 92 | ## notice, this list of conditions and the following disclaimer in 93 | ## the documentation and/or other materials provided with the 94 | ## distribution. 95 | 96 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 97 | ## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 98 | ## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 99 | ## A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 100 | ## HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 101 | ## INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 102 | ## BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 103 | ## OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 104 | ## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 105 | ## TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 106 | ## USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 107 | ## DAMAGE. 108 | -------------------------------------------------------------------------------- /tests/utils_test.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import inspect 3 | currentdir = os.path.dirname(os.path.abspath( 4 | inspect.getfile(inspect.currentframe()))) 5 | parentdir = os.path.dirname(currentdir) 6 | sys.path.insert(0, parentdir) 7 | 8 | import numpy as np 9 | import pytest 10 | import librosa 11 | import pathlib 12 | import soundpy as sp 13 | 14 | audio_dir = 'test_audio/' 15 | test_audiofile = '{}audio2channels.wav'.format(audio_dir) 16 | 17 | 18 | 19 | def test_path_or_samples_str(): 20 | item_type = sp.utils.path_or_samples(test_audiofile) 21 | assert item_type == 'path' 22 | 23 | def test_path_or_samples_pathlib(): 24 | item_type = sp.utils.path_or_samples(pathlib.Path(test_audiofile)) 25 | assert item_type == 'path' 26 | 27 | def test_path_or_samples_tuple_librosa(): 28 | item = librosa.load(test_audiofile) 29 | item_type = sp.utils.path_or_samples(item) 30 | assert item_type == 'samples' 31 | 32 | def test_path_or_samples_tuple_not_real_samples(): 33 | item = (np.ndarray([1,2,3]), 4) 34 | item_type = sp.utils.path_or_samples(item) 35 | assert item_type == 'samples' 36 | 37 | def test_path_or_samples_str_not_real_path(): 38 | print('IF TEST FAILES: For now, function does not test for path validity.') 39 | with pytest.raises(ValueError): 40 | item_type = sp.utils.path_or_samples('blah') 41 | 42 | def test_path_or_samples_pathlib_not_real_path(): 43 | print('IF TEST FAILES: For now, function does not test for path validity.') 44 | with pytest.raises(ValueError): 45 | item_type = sp.utils.path_or_samples(pathlib.Path('blah')) 46 | 47 | def test_match_dtype_float2int(): 48 | array_original = np.array([1,2,3,4]) 49 | array_to_change = np.array([1.,2.,3.,4.,5.]) 50 | array_adjusted = sp.utils.match_dtype(array_to_change, array_original) 51 | assert array_original.dtype == array_adjusted.dtype 52 | assert len(array_to_change) == len(array_adjusted) 53 | assert np.array_equal(array_to_change, array_adjusted) 54 | assert array_to_change.dtype != array_original.dtype 55 | 56 | def test_match_dtype_int2float(): 57 | array_original = np.array([1.,2.,3.,4.]) 58 | array_to_change = np.array([1,2,3,4,5]) 59 | array_adjusted = sp.utils.match_dtype(array_to_change, array_original) 60 | assert array_original.dtype == array_adjusted.dtype 61 | assert len(array_to_change) == len(array_adjusted) 62 | assert np.array_equal(array_to_change, array_adjusted) 63 | assert array_to_change.dtype != array_original.dtype 64 | 65 | def test_shape_samps_channels_too_many_dimensions(): 66 | input_data = np.array([1,2,3,4,5,6,7,8,9,10,11,12]).reshape(2,3,2) 67 | with pytest.raises(ValueError): 68 | output_data = sp.dsp.shape_samps_channels(input_data) 69 | 70 | def test_check_dir_default_create(): 71 | test_dir = './testtesttest/' 72 | test_dir = sp.utils.check_dir(test_dir) 73 | assert isinstance(test_dir, pathlib.PosixPath) 74 | assert os.path.exists(test_dir) 75 | os.rmdir(test_dir) 76 | 77 | def test_check_dir_check_exists(): 78 | test_dir = './testtesttest/' 79 | test_dir = sp.utils.check_dir(test_dir, make=True) 80 | test_dir = sp.utils.check_dir(test_dir, make=False) 81 | assert isinstance(test_dir, pathlib.PosixPath) 82 | assert os.path.exists(test_dir) 83 | os.rmdir(test_dir) 84 | 85 | def test_check_dir_check_exists_raiseerror(): 86 | test_dir = './testtesttest/' 87 | with pytest.raises(FileNotFoundError): 88 | test_dir = sp.utils.check_dir(test_dir, make=False) 89 | 90 | def test_check_dir_check_exists_notwriteinto_raiseerror(): 91 | test_dir = './testtesttest/' 92 | test_dir = sp.utils.check_dir(test_dir, make=True) 93 | with pytest.raises(FileExistsError): 94 | test_dir = sp.utils.check_dir(test_dir, make=False, append=False) 95 | os.rmdir(test_dir) 96 | 97 | def test_check_dir_pathwithextension_raiseerror(): 98 | test_dir = './testtesttest.py/' 99 | with pytest.raises(TypeError): 100 | test_dir = sp.utils.check_dir(test_dir, make=False) 101 | 102 | def test_string2list(): 103 | audiofiles = sp.files.collect_audiofiles(audio_dir,wav_only=False, 104 | recursive=False) 105 | audiofiles_string = str(audiofiles) 106 | audiofiles_checked = sp.utils.restore_dictvalue(audiofiles_string) 107 | assert audiofiles == audiofiles_checked 108 | 109 | def test_string2list_loaddict(): 110 | audiofiles = sp.files.collect_audiofiles(audio_dir,wav_only=False, 111 | recursive=False) 112 | d = dict([(0,audiofiles)]) 113 | test_dict_path = 'testest.csv' 114 | if os.path.exists(test_dict_path): 115 | os.remove(test_dict_path) 116 | d_path = sp.utils.save_dict( 117 | dict2save = d, 118 | filename = test_dict_path) 119 | d_loaded = sp.utils.load_dict(d_path) 120 | for i, key in enumerate(d_loaded): 121 | key = key 122 | audiofiles_string = d_loaded[key] 123 | audiofiles_checked = sp.utils.restore_dictvalue(audiofiles_string) 124 | assert audiofiles == audiofiles_checked 125 | os.remove(test_dict_path) 126 | 127 | def test_restore_dictvalue_list_of_tuples(): 128 | pass 129 | 130 | def test_restore_dictvalue_regular_string(): 131 | expected = 'hann' 132 | got = sp.utils.restore_dictvalue(expected) 133 | assert expected == got 134 | 135 | def test_restore_dictvalue_None(): 136 | expected = None 137 | string_val = str(expected) 138 | got = sp.utils.restore_dictvalue(string_val) 139 | assert expected == got 140 | 141 | def test_restore_dictvalue_True(): 142 | expected = True 143 | string_val = str(expected) 144 | got = sp.utils.restore_dictvalue(string_val) 145 | assert expected == got 146 | 147 | def test_restore_dictvalue_False(): 148 | expected = False 149 | string_val = str(expected) 150 | got = sp.utils.restore_dictvalue(string_val) 151 | assert expected == got 152 | 153 | def test_restore_dictvalue_int(): 154 | expected = 1 155 | string_val = str(expected) 156 | got = sp.utils.restore_dictvalue(string_val) 157 | assert expected == got 158 | 159 | def test_restore_dictvalue_float(): 160 | expected = 1.0 161 | string_val = str(expected) 162 | got = sp.utils.restore_dictvalue(string_val) 163 | assert expected == got 164 | 165 | def test_restore_dictvalue_tuple(): 166 | expected = (3,4) 167 | string_val = str(expected) 168 | got = sp.utils.restore_dictvalue(string_val) 169 | assert expected == got 170 | 171 | def test_restore_dictvalue_list_of_pathwaystrings(): 172 | expected = ['audio1.wav','audio2.wav','audio3.wav'] 173 | string_list = str(expected) 174 | got = sp.utils.restore_dictvalue(string_list) 175 | assert expected == got 176 | 177 | def test_restore_dictvalue_list_of_pathlib_ojbect_strings(): 178 | expected = [pathlib.Path('audio1.wav'),pathlib.Path('audio2.wav'),pathlib.Path('audio3.wav')] 179 | string_list = str(expected) 180 | got = sp.utils.restore_dictvalue(string_list) 181 | assert expected == got 182 | 183 | def test_restore_dictvalue_list_of_pathwaystrings_nested(): 184 | expected = [['audio1.wav','audio2.wav'],['audio3.wav']] 185 | string_list = str(expected) 186 | got = sp.utils.restore_dictvalue(string_list) 187 | assert expected == got 188 | 189 | def test_restore_dictvalue_list_of_pathlib_ojbect_strings_nested(): 190 | expected = [[pathlib.Path('audio1.wav'),pathlib.Path('audio2.wav')],[pathlib.Path('audio3.wav')]] 191 | string_list = str(expected) 192 | with pytest.raises(ValueError): 193 | got = sp.utils.restore_dictvalue(string_list) 194 | 195 | def test_restore_dictvalue_tuple_labeledpaths(): 196 | expected = [(1, 'audio1.wav'),(2, 'audio2.wav'),(3, 'audio3.wav')] 197 | string_list = str(expected) 198 | got = sp.utils.restore_dictvalue(string_list) 199 | assert expected == got 200 | 201 | def test_restore_dictvalue_tuple_labeled_pathlibojbects(): 202 | expected = [(1, pathlib.Path('audio1.wav')),(2, pathlib.Path('audio2.wav')),(3, pathlib.Path('audio3.wav'))] 203 | string_list = str(expected) 204 | got = sp.utils.restore_dictvalue(string_list) 205 | assert expected == got 206 | -------------------------------------------------------------------------------- /tests_requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | --------------------------------------------------------------------------------