├── .gitignore
├── Copying.docx
├── Dockerfile
├── GNU_AGPL_full.docx
├── LICENSE.md
├── README.md
├── THIRD-PARTY-NOTICES.docx
├── TODO.md
├── audiodata
    ├── background_samples
    │   ├── README.md
    │   ├── cafe.wav
    │   ├── fridge.wav
    │   └── traffic.wav
    ├── car_horn.wav
    ├── models
    │   └── denoiser
    │   │   ├── example_denoiser_stft.h5
    │   │   ├── log.csv
    │   │   └── log_extraction_settings.csv
    └── python.wav
├── build_aju_image.sh
├── doc_requirements.txt
├── docs
    └── source
    │   ├── 0.1.0a2
    │       ├── augment.rst
    │       ├── builtin_sp.rst
    │       ├── builtin_spdl.rst
    │       ├── changelog.rst
    │       ├── conf.py
    │       ├── datasets.rst
    │       ├── dsp.rst
    │       ├── example_cases.rst
    │       ├── examples
    │       │   ├── README.txt
    │       │   ├── plot_SNR_add_noise_to_datasets.py
    │       │   ├── plot_augment_sound.py
    │       │   ├── plot_dataset_info_formatting.py
    │       │   ├── plot_featureprep_denoiser.py
    │       │   ├── plot_featureprep_envclassifier.py
    │       │   ├── plot_filter_out_noise.py
    │       │   ├── plot_implement_denoiser.py
    │       │   ├── plot_signals_and_features.py
    │       │   ├── plot_train_classifier.py
    │       │   ├── plot_train_denoiser.py
    │       │   └── plot_vad_snr_filter.py
    │       ├── exceptions.rst
    │       ├── feats.rst
    │       ├── files.rst
    │       ├── filters.rst
    │       ├── index.rst
    │       ├── model_dataprep.rst
    │       ├── modelsetup.rst
    │       ├── modules.rst
    │       ├── readme.rst
    │       ├── template_models.rst
    │       └── utils.rst
    │   ├── 0.1.0a3
    │       ├── augment.rst
    │       ├── builtin_sp.rst
    │       ├── builtin_spdl.rst
    │       ├── changelog.rst
    │       ├── conf.py
    │       ├── datasets.rst
    │       ├── dsp.rst
    │       ├── example_cases.rst
    │       ├── examples
    │       │   ├── README.txt
    │       │   ├── plot_SNR_add_noise_to_datasets.py
    │       │   ├── plot_augment_sound.py
    │       │   ├── plot_dataset_info_formatting.py
    │       │   ├── plot_extract_augment_train_classifier.py
    │       │   ├── plot_featureprep_denoiser.py
    │       │   ├── plot_featureprep_envclassifier.py
    │       │   ├── plot_filter_out_noise.py
    │       │   ├── plot_implement_denoiser.py
    │       │   ├── plot_signals_and_features.py
    │       │   ├── plot_train_classifier.py
    │       │   ├── plot_train_denoiser.py
    │       │   └── plot_vad_snr_filter.py
    │       ├── exceptions.rst
    │       ├── feats.rst
    │       ├── files.rst
    │       ├── filters.rst
    │       ├── index.rst
    │       ├── model_dataprep.rst
    │       ├── modelsetup.rst
    │       ├── modules.rst
    │       ├── readme.rst
    │       ├── template_models.rst
    │       ├── utils.rst
    │       └── versions.rst
    │   ├── augment.rst
    │   ├── builtin_sp.rst
    │   ├── builtin_spdl.rst
    │   ├── changelog.rst
    │   ├── conf.py
    │   ├── datasets.rst
    │   ├── dsp.rst
    │   ├── example_cases.rst
    │   ├── examples
    │       ├── README.txt
    │       ├── plot_SNR_add_noise_to_datasets.py
    │       ├── plot_augment_sound.py
    │       ├── plot_dataset_info_formatting.py
    │       ├── plot_extract_augment_train_classifier.py
    │       ├── plot_featureprep_denoiser.py
    │       ├── plot_featureprep_envclassifier.py
    │       ├── plot_filter_out_noise.py
    │       ├── plot_implement_denoiser.py
    │       ├── plot_signals_and_features.py
    │       ├── plot_train_classifier.py
    │       ├── plot_train_denoiser.py
    │       └── plot_vad_snr_filter.py
    │   ├── exceptions.rst
    │   ├── feats.rst
    │   ├── files.rst
    │   ├── filters.rst
    │   ├── index.rst
    │   ├── model_dataprep.rst
    │   ├── modelsetup.rst
    │   ├── modules.rst
    │   ├── readme.rst
    │   ├── template_models.rst
    │   ├── utils.rst
    │   └── versions.rst
├── jupyter_notebooks
    ├── augment_sound_machine_learning.ipynb
    ├── filter_out_noise.ipynb
    ├── generate_signals_noise_snr.ipynb
    ├── implement_denoiser.ipynb
    ├── plot_vad_snr_filter.ipynb
    └── speech_noise_SNR.ipynb
├── new_version_updates.md
├── requirements.txt
├── setup.py
├── soundpy
    ├── __init__.py
    ├── __init__.pyc
    ├── augment.py
    ├── builtin.py
    ├── datasets.py
    ├── dsp.py
    ├── exceptions.py
    ├── feats.py
    ├── files.py
    ├── filters.py
    ├── models
    │   ├── __init__.py
    │   ├── builtin.py
    │   ├── dataprep.py
    │   ├── modelsetup.py
    │   ├── plot.py
    │   └── template_models.py
    ├── utils.py
    └── utils.pyc
├── start_jup_env.sh
├── tests
    ├── datasets_test.py
    ├── dsp_test.py
    ├── feats_test.py
    ├── filters_test.py
    ├── inspect_functions.py
    └── utils_test.py
└── tests_requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | env/
 2 | ve/
 3 | __pycache__/
 4 | saved_features_and_models/
 5 | audiodata/
 6 | images/
 7 | images_1/
 8 | audiodata2/
 9 | audiodata3/
10 | .ipynb_checkpoints/
11 | env2/
12 | env3/
13 | docs/build/
14 | docs/doc_layout.md
15 | docs/Makefile
16 | docs/make.bat
17 | docs/source/auto_examples/
18 | example_dir/
19 | tests/testing_pypi/
20 | test_audio/
21 | compare_augmentations_right/
22 | compare_augmentations_nine/
23 | build/
24 | *.npy
25 | dev_env/
26 | docs/source/examples/example_feats_models/
27 | *.png
28 | example_feats_models/
29 | update_env/
30 | debug_env/
31 | p3_test/
32 | 
33 | 


--------------------------------------------------------------------------------
/Copying.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/Copying.docx


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:2.1.0-gpu-py3
 2 | 
 3 | RUN apt update && apt upgrade -y
 4 | 
 5 | RUN apt-get install -y libsndfile1
 6 | 
 7 | RUN python -m pip install --upgrade pip
 8 | 
 9 | RUN pip install -U soundfile \
10 |                     librosa \
11 |                     python_speech_features \
12 |                     notebook \
13 |                     matplotlib 
14 |                     
15 | RUN pip install -U scikit-image
16 |                     
17 | RUN mkdir /root/soundpy/
18 | 
19 | WORKDIR /root/soundpy/
20 | 


--------------------------------------------------------------------------------
/GNU_AGPL_full.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/GNU_AGPL_full.docx


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ## AGPL-3.0 License
 2 | 
 3 | Copyright (c) 2020, Aislyn Rose.
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software
 6 | under the terms of the GNU General Public License as published by the
 7 | <a href="http://fsf.org">Free Software Foundation</a>, either version 3 of the License, or (at your option) 
 8 | any later version.
 9 | 
10 | The SoundPy framework  is distributed in the hope that it will be useful, but 
11 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 
13 | details. 
14 | 


--------------------------------------------------------------------------------
/THIRD-PARTY-NOTICES.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/THIRD-PARTY-NOTICES.docx


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Current
 3 | - make it easier to use / build different models
 4 | - implement autoencoder model
 5 | - implement denoising with autoencoder model
 6 | - build autoencoder in keras
 7 | - build autoencoder in pytorch
 8 | - build via docker image
 9 | 
10 | ## Functionality
11 | 
12 | - autoencoder training
13 | - get postfilter to work on spectral subtraction
14 | - set power_scale default to 'power_to_db'?
15 | - functions to use librosa or not to perform tasks (librosa doesn't work on notebooks.ai for example)
16 | - measure level of snr
17 | - measure quality of filtering/speech enhancement
18 | - measure signal similarity
19 | - source separation
20 | - gender switch
21 | - text to speech
22 | - speech to text
23 | - dataset exploration (visualize 10 random samples/ based on size?, etc.)
24 | - simple inclusion of noise reduction into training models
25 | - pysoundtool and pysoundtool.online version? (use librosa vs no librosa)
26 | 
27 | ## Presentation
28 | 
29 | - blog post on each set of functionalities
30 | - presentation of examples
31 | - get documentation online
32 | - simplify functions
33 | - improve documentation (references, examples, testing, data shapes!!, help options)
34 | 
35 | ## Testing
36 | 
37 | - expand test cases
38 | - efficiency of code
39 | 
40 | ## Organization
41 | 
42 | - reorganize based on use... how import statement should work
43 | - make sample_rate, samprate, samplingrate, sr namespace consistent
44 | - make features/feature_type namespace consistent
45 | - use keyword arguments for librosa and scipy?
46 | - simplify
47 | 
48 | 
49 | ## Organization ideas:
50 | 
51 | pyst.loadsound(audiofile, sr)
52 | pyst.playsound(audiofile, sr)?
53 | pyst.plotsound(audiofile, sr, feature_type)
54 | 
55 | pyst.data.train_val_test(input_data, output_data)
56 | pyst.data.analyze(audo_dir)? For example for audio types, lengths?, sizes? etc. Useful for logging?
57 | pyst.feats.plot()
58 | pyst.feats.hear()
59 | pyst.feats.extract()
60 | model = pyst.models.speechrec_simple() # model will be a class instance..
61 | history = pyst.models.train(model, train_path, val_path)
62 | matplotplib.pyplot.plot(history) ?
63 | pyst.models.plot(history)
64 | pyst.models.run(model, test_path)
65 | 
66 | pyst.filters.wiener()
67 | pyst.filters.bandsubtraction()
68 | pyst.models.soundclassifier()
69 | pyst.models.autoencoder_denoise()
70 | pyst.models.speechrec()
71 | 


--------------------------------------------------------------------------------
/audiodata/background_samples/README.md:
--------------------------------------------------------------------------------
 1 | ## Background Noise Examples 
 2 | 
 3 | These sounds were downloaded from freesound.org and are licensed under the Creative Commons 0 License. 
 4 | 
 5 | They have been limited to 10 seconds and the sample rate reduced to 16Hz to reduce their sizes.
 6 | 
 7 | ### cafe.wav
 8 | 
 9 | 387030__antonybk__cafe-takk-northern-quarter-manchester.wav
10 | 
11 | ### traffic.wav
12 | 
13 | 261344__ivolipa__city-traffic-day.wav
14 | 
15 | ### fridge.wav
16 | 
17 | 237399__squareal__fridge-tone.wav
18 | 


--------------------------------------------------------------------------------
/audiodata/background_samples/cafe.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/background_samples/cafe.wav


--------------------------------------------------------------------------------
/audiodata/background_samples/fridge.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/background_samples/fridge.wav


--------------------------------------------------------------------------------
/audiodata/background_samples/traffic.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/background_samples/traffic.wav


--------------------------------------------------------------------------------
/audiodata/car_horn.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/car_horn.wav


--------------------------------------------------------------------------------
/audiodata/models/denoiser/example_denoiser_stft.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/models/denoiser/example_denoiser_stft.h5


--------------------------------------------------------------------------------
/audiodata/models/denoiser/log_extraction_settings.csv:
--------------------------------------------------------------------------------
 1 | dur_sec,3
 2 | feature_type,stft noisy
 3 | feat_type,stft
 4 | complex_vals,True
 5 | sr,22050
 6 | num_feats,177
 7 | n_fft,352
 8 | win_size_ms,16
 9 | frame_length,352
10 | percent_overlap,0.5
11 | window,hann
12 | frames_per_sample,11
13 | labeled_data,False
14 | visualize,True
15 | input_shape,"(35, 11, 177)"
16 | desired_shape,"(385, 177)"
17 | use_librosa,True
18 | center,True
19 | mode,reflect
20 | subsection_data,True
21 | divide_factor,10
22 | 


--------------------------------------------------------------------------------
/audiodata/python.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/python.wav


--------------------------------------------------------------------------------
/build_aju_image.sh:
--------------------------------------------------------------------------------
1 | # chmod u+x build_aju_image.sh
2 | 
3 | docker build . -t aju
4 | 


--------------------------------------------------------------------------------
/doc_requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx-rtd-theme
2 | sphinx-gallery
3 | numpydoc
4 | pillow
5 | ipython
6 | pandas
7 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/augment.rst:
--------------------------------------------------------------------------------
1 | 
2 | Augment audio data
3 | ------------------
4 | 
5 | .. automodule:: soundpy.augment
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/builtin_sp.rst:
--------------------------------------------------------------------------------
1 | 
2 | Built-In Functionality (non Deep Learning)
3 | ------------------------------------------
4 | 
5 | .. automodule:: soundpy.builtin
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/builtin_spdl.rst:
--------------------------------------------------------------------------------
1 | 
2 | Built-In Functionality (Deep Learning)
3 | --------------------------------------
4 | 
5 | .. automodule:: soundpy.models.builtin
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/changelog.rst:
--------------------------------------------------------------------------------
 1 | *********
 2 | Changelog
 3 | *********
 4 | 
 5 | v0.1.0a
 6 | =======
 7 | 
 8 | v0.1.0a2
 9 | --------
10 | 2020-08-13
11 | 
12 | 
13 | Bug fixes
14 |    -  added `use_beg_ms` parameter in `soundpy.dsp.vad`: improved VAD recognition of silences post speech.
15 | 
16 | Features
17 |    -  added GPU option: provide instructions and Docker image for running SoundPy with GPU
18 |    -  added `extend_window_ms` paremeter to `soundpy.feats.get_vad_samples` and `soundpy.feats.get_vad_stft`:  can extend VAD window if desired. Useful in higher SNR environments.
19 |    -  added `soundpy.feats.get_samples_clipped` and `soundpy.feats.get_stft_clipped` to clip off beginning and ending silences.
20 |    -  added `beg_end_clipped` parameter to `soundpy.feats.plot_vad` to visualize VAD by clipping the beginning and ending silences (if True) or VAD instances throughout the signal (if False).
21 |    -  added `soundpy.models.dataprep.GeneratorFeatExtraction` class for extracting and augmenting features during training (still experimental).
22 |    -  added `soundpy.models.builtin.envclassifier_extract_train` as an example of extracting and augmenting features during training (still experimental).
23 |    -  added `soundpy.dsp.clip_at_zero` to enable smoother concatenations of signals and enables removal of clicks at beginning and ending of signals.
24 |    -  added `soundpy.dsp.remove_dc_bias` to enable smoother concatenations of signals
25 |    -  added and set `remove_dc` parameter to True in `soundpy.files.loadsound` and `soundpy.files.savesound` to ensure signals all have mean zero.
26 |    -  added `mirror_sound` option to `soundpy.dsp.apply_sample_length` as a way to extend sound.
27 |    -  added `soundpy.dsp.ismono` to check if samples were mono or stereo.
28 |    -  added `soundpy.dsp.average_channels` to average sample amplitudes across channels, e.g. to identify where high energy begins / ends in the signal without disregarding additional channels (if stereo sound).
29 |    -  added `soundpy.dsp.add_channels` for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound)
30 |    -  added stereo sound functionality to `soundpy.dsp.add_backgroundsound`, `soundpy.dsp.clip_at_zero`, `soundpy.dsp.calc_fft`, `soundpy.feats.get_stft`, `soundpy.feats.get_vad_stft` 
31 |    
32 | 
33 | Other changes
34 |    -  name change: from pysoundtool to soundpy: simpler
35 |    -  updated dependencies to newest versions still compatible with Tensorflow 2.1.0
36 |    -  moved `soundpy.dsp.get_vad_samples` to `soundpy.feats.get_vad_samples`
37 |    -  moved `soundpy.dsp.get_vad_stft` to `soundpy.feats.get_vad_stft`
38 |    -  name change: allow `soundpy.feats.normalize` to be used as `soundpy.normalize`
39 |    -  removed `pysoundtool_online` and mybinder button as maintaining the online version was not easily done. Aim to reimplement at some point.
40 |    
41 | 
42 | 
43 | v0.1.0a1
44 | ========
45 | 
46 | Initial public alpha release.
47 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/datasets.rst:
--------------------------------------------------------------------------------
1 | 
2 | Organizing datasets
3 | -------------------
4 | 
5 | .. automodule:: soundpy.datasets
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/dsp.rst:
--------------------------------------------------------------------------------
1 | 
2 | Working with signals
3 | --------------------
4 | 
5 | .. automodule:: soundpy.dsp
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/example_cases.rst:
--------------------------------------------------------------------------------
1 | .. toctree::
2 |    :maxdepth: 2
3 | 
4 | .. include:: auto_examples/index.rst
5 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/README.txt:
--------------------------------------------------------------------------------
1 |  
2 | -----------------------------
3 | SoundPy Example Use Cases
4 | -----------------------------
5 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_SNR_add_noise_to_datasets.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | """
  4 | ==========================================
  5 | Add Noise to Speech at Specific SNR Levels
  6 | ==========================================
  7 | 
  8 | Add noise to speech at specific signal-to-noise ratio levels.
  9 | 
 10 | To see how soundpy implements this, see `soundpy.dsp.add_backgroundsound`.
 11 | """
 12 | 
 13 | 
 14 | ###############################################################################################
 15 | # 
 16 | 
 17 | 
 18 | #####################################################################
 19 | # Let's import soundpy, and ipd for playing audio data
 20 | import soundpy as sp
 21 | import IPython.display as ipd
 22 | 
 23 | 
 24 | ######################################################
 25 | # Define the speech and noise data samples
 26 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 27 | 
 28 | ######################################################
 29 | # I will use speech and noise data from the soundpy repo.
 30 | 
 31 | ##########################################################
 32 | # Designate path relevant for accessing audiodata
 33 | sp_dir = '../../../'
 34 | 
 35 | ##########################################################
 36 | # Speech sample:
 37 | speech_sample = '{}audiodata/python.wav'.format(sp_dir)
 38 | speech_sample = sp.utils.string2pathlib(speech_sample)
 39 | # as pathlib object, can do the following: 
 40 | word = speech_sample.stem
 41 | word
 42 | 
 43 | ##########################################################
 44 | # Noise sample:
 45 | noise_sample = '{}audiodata/background_samples/cafe.wav'.format(sp_dir)
 46 | noise_sample = sp.utils.string2pathlib(noise_sample)
 47 | # as pathlib object, can do the following: 
 48 | noise = noise_sample.stem
 49 | noise
 50 | 
 51 | 
 52 | ##########################################################
 53 | # Hear Clean Speech
 54 | # ~~~~~~~~~~~~~~~~~
 55 | # I'm using a higher sample rate here as calculating SNR 
 56 | # performs best upwards of 44100 Hz.
 57 | sr = 44100
 58 | s, sr = sp.loadsound(speech_sample, sr = sr)
 59 | ipd.Audio(s,rate=sr)
 60 | 
 61 | 
 62 | ##########################################################
 63 | # Hear Noise
 64 | # ~~~~~~~~~~
 65 | n, sr = sp.loadsound(noise_sample, sr = sr)
 66 | ipd.Audio(n,rate=sr)
 67 | 
 68 | 
 69 | ##########################################################
 70 | # Hear Signal-to-Noise Ratio 20
 71 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 72 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(
 73 |     speech_sample,
 74 |     noise_sample,
 75 |     sr = sr,
 76 |     snr = 20)
 77 | ipd.Audio(noisyspeech_20snr,rate=sr)
 78 | 
 79 | ########################################################
 80 | # `snr20` is simply the measured SNR post adjustment fo the noise signal.
 81 | # This is useful to check that the indicated snr is at least close
 82 | # to the resulting snr.
 83 | snr20
 84 | 
 85 | ##########################################################
 86 | # Hear Signal-to-Noise Ratio 5
 87 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 88 | noisyspeech_5snr, snr5 = sp.dsp.add_backgroundsound(
 89 |     speech_sample,
 90 |     noise_sample,
 91 |     sr = sr,
 92 |     snr = 5)
 93 | ipd.Audio(noisyspeech_5snr,rate=sr)
 94 | 
 95 | #########################################################
 96 | snr5
 97 | 
 98 | ######################################################################
 99 | # Visualize the Audio Samples
100 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^
101 | 
102 | ######################################################################
103 | # See Clean Speech (raw signal)
104 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
105 | sp.plotsound(speech_sample, feature_type='signal', 
106 |                sr = sr, title = 'Speech: ' + word.upper())
107 | 
108 | ######################################################################
109 | # See Clean Speech (stft)
110 | # ~~~~~~~~~~~~~~~~~~~~~~~
111 | sp.plotsound(speech_sample, feature_type='stft', 
112 |                sr = sr, title = 'Speech: ' + word.upper())
113 | 
114 | ###################################################################### See Noise (raw signal)
115 | # ~~~~~~~~~~~~~~~~~~~~~~
116 | sp.plotsound(noise_sample, feature_type='signal',
117 |                title = 'Noise: ' + noise.upper())
118 | 
119 | ###################################################################### See Noise (stft)
120 | # ~~~~~~~~~~~~~~~~
121 | sp.plotsound(noise_sample, feature_type='stft',
122 |                title = 'Noise: ' + noise.upper())
123 | 
124 | ######################################################################
125 | # See Noisy Speech: SNR 20 (raw signal)
126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
127 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',
128 |                title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper()))
129 | 
130 | ######################################################################
131 | # See Noisy Speech: SNR 20 (stft)
132 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
133 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'stft',
134 |                title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper()))
135 | 
136 | ######################################################################
137 | # See Noisy Speech: SNR 5 (raw signal)
138 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
139 | sp.plotsound(noisyspeech_5snr, sr = sr, feature_type = 'signal',
140 |                title = '"{}" with {} noise at SNR 5'.format(word.upper(), noise.upper()))
141 | 
142 | ######################################################################
143 | # See Noisy Speech: SNR 5 (stft)
144 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
145 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'stft',
146 |                title = '"{}" with {} noise at SNR 5'.format(word.upper(), noise.upper()))
147 | 
148 | ######################################################################
149 | # Make Combined Sound Longer
150 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^
151 | 
152 | ##########################################################
153 | # Pad Speech and Set Total Length 
154 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
155 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(
156 |     speech_sample,
157 |     noise_sample,
158 |     sr = sr,
159 |     snr = 20,
160 |     pad_mainsound_sec = 1,
161 |     total_len_sec = 4)
162 | 
163 | ##########################################################
164 | ipd.Audio(noisyspeech_20snr,rate=sr)
165 | 
166 | ##########################################################
167 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',
168 |                title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper()))
169 | 
170 | 
171 | ######################################################################
172 | # Make Combined Sound Shorter
173 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^
174 | 
175 | ##########################################################
176 | # Set Total Length
177 | # ~~~~~~~~~~~~~~~~
178 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(
179 |     speech_sample,
180 |     noise_sample,
181 |     sr = sr,
182 |     snr = 20,
183 |     total_len_sec = 0.5)
184 | 
185 | ##########################################################
186 | ipd.Audio(noisyspeech_20snr,rate=sr)
187 | 
188 | ##########################################################
189 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',
190 |                title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper()))
191 | 
192 | ######################################################################
193 | # Wrap the Background Sound
194 | # ^^^^^^^^^^^^^^^^^^^^^^^^^
195 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(
196 |     speech_sample,
197 |     noise_sample,
198 |     sr = sr,
199 |     snr = 20,
200 |     wrap = True,
201 |     pad_mainsound_sec = 2,
202 |     total_len_sec = 5)
203 | 
204 | ##########################################################
205 | ipd.Audio(noisyspeech_20snr,rate=sr)
206 | 
207 | ##########################################################
208 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',
209 |                title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper()))
210 | 
211 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_dataset_info_formatting.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 | ========================================
 4 | Audio Dataset Exploration and Formatting
 5 | ========================================
 6 | 
 7 | Examine audio files within a dataset, and reformat them if desired.  
 8 | 
 9 | To see how soundpy implements this, see `soundpy.builtin.dataset_logger` and 
10 | `soundpy.builtin.dataset_formatter`.
11 | """
12 | 
13 | #####################################################################
14 | # Let's import soundpy 
15 | import soundpy as sp
16 | 
17 | ###############################################################################################
18 | #  
19 | # Dataset Exploration
20 | # ^^^^^^^^^^^^^^^^^^^
21 | 
22 | ##########################################################
23 | # Designate path relevant for accessing audiodata
24 | sp_dir = '../../../'
25 | 
26 | ##########################################################
27 | # I will explore files in a small dataset on my computer with varying file formats.
28 | dataset_path = '{}audiodata2/'.format(sp_dir)
29 | dataset_info_dict = sp.builtin.dataset_logger('{}audiodata2/'.format(sp_dir));
30 | 
31 | #########################################################################
32 | # This returns our data in a dictionary, perfect for exploring via Pandas
33 | import pandas as pd
34 | all_data = pd.DataFrame(dataset_info_dict).T
35 | all_data.head()
36 | 
37 | ###################################
38 | # Let's have a look at the audio files and how uniform they are:
39 | print('formats: ', all_data.format_type.unique())
40 | print('bitdepth (types): ', all_data.bitdepth.unique())
41 | print('mean duration (sec): ', all_data.dur_sec.mean())
42 | print('std dev duration (sec): ', all_data.dur_sec.std())
43 | print('min sample rate: ', all_data.sr.min())
44 | print('max sample rate: ', all_data.sr.max())
45 | print('number of channels: ', all_data.num_channels.unique())
46 | 
47 | 
48 | ##########################################################
49 | # For a visual example, let's plot the count of various sample rates. (48000 Hz is high definition sound, 16000 Hz is wideband, and 8000 Hz is narrowband, similar to how speech sounds on the telephone.)
50 | all_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
51 | 
52 | ###############################################################################################
53 | # Reformat a Dataset
54 | # ^^^^^^^^^^^^^^^^^^
55 | 
56 | ##############################################################
57 | # Let's say we have a dataset that we want to make consistent. 
58 | # We can do that with soundpy
59 | new_dataset_dir = sp.builtin.dataset_formatter(
60 |     dataset_path, 
61 |     recursive = True, # we want all the audio, even in nested directories
62 |     format='WAV',
63 |     bitdepth = 16, # if set to None, a default bitdepth will be applied
64 |     sr = 8000, # narrowband
65 |     mono = True, # ensure data all have 1 channel
66 |     dur_sec = 3, # audio will be limited to 3 seconds
67 |     zeropad = True, # audio shorter than 3 seconds will be zeropadded
68 |     new_dir = './example_dir/', # if None, a time-stamped directory will be created for you
69 |     overwrite = False # can set to True if you want to overwrite files
70 |     );
71 |         
72 | ###############################################
73 | # Let's see what the audio data looks like now:
74 | dataset_formatted_dict = sp.builtin.dataset_logger(new_dataset_dir, recursive=True);
75 | formatted_data = pd.DataFrame(dataset_formatted_dict).T
76 | 
77 | #####################
78 | formatted_data.head()
79 | 
80 | ###################################
81 | print('audio formats: ', formatted_data.format_type.unique())
82 | print('bitdepth (types): ', formatted_data.bitdepth.unique())
83 | print('mean duration (sec): ', formatted_data.dur_sec.mean())
84 | print('std dev duration (sec): ', formatted_data.dur_sec.std())
85 | print('min sample rate: ', formatted_data.sr.min())
86 | print('max sample rate: ', formatted_data.sr.max())
87 | print('number of channels: ', formatted_data.num_channels.unique())
88 | 
89 | ##########################################################
90 | # Now all the audio data is sampled at the same rate: 8000 Hz
91 | formatted_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
92 | 
93 | ###########################################
94 | # There we go! 
95 | # You can reformat only parts of the audio files, e.g. format or bitdepth.
96 | # If you leave parameters in sp.builtin.dataset_formatter as None, the original
97 | # settings of the audio file will be maintained (except for bitdepth. 
98 | # A default bitdepth will be applied according to the format of the file); see `soundfile.default_subtype`.
99 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_featureprep_denoiser.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 | =======================================================
 4 | Feature Extraction for Denoising: Clean and Noisy Audio
 5 | =======================================================
 6 | 
 7 | Extract acoustic features from clean and noisy datasets for 
 8 | training a denoising model, e.g. a denoising autoencoder.
 9 | 
10 | To see how soundpy implements this, see `soundpy.builtin.denoiser_feats`.
11 | """
12 | 
13 | 
14 | ###############################################################################################
15 | # 
16 | 
17 | #####################################################################
18 | import soundpy as sp
19 | import IPython.display as ipd
20 | 
21 | ######################################################
22 | # Prepare for Extraction: Data Organization
23 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
24 | 
25 | ######################################################
26 | # I will use a mini denoising dataset as an example
27 | 
28 | # Example noisy data:
29 | data_noisy_dir = '/home/airos/Projects/Data/denoising/uwnu/noisy'
30 | # Example clean data:
31 | data_clean_dir = '/home/airos/Projects/Data/denoising/uwnu/clean/'
32 | # Where to save extracted features:
33 | data_features_dir = './audiodata/example_feats_models/denoiser/'
34 | 
35 | ######################################################
36 | # Choose Feature Type 
37 | # ~~~~~~~~~~~~~~~~~~~
38 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
39 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
40 | 
41 | feature_type = 'stft'
42 | sr = 22050
43 | 
44 | ######################################################
45 | # Set Duration of Audio 
46 | # ~~~~~~~~~~~~~~~~~~~~~
47 | # How much audio in seconds used from each audio file.
48 | # the speech samples are about 3 seconds long.
49 | dur_sec = 3
50 | 
51 | ######################################################
52 | # Set Context Window / Number of Frames
53 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
54 | # How many sections should each sample be broken into? (optional)
55 | # Some research papers include a 'context window' or the like, 
56 | # which this refers to.
57 | frames_per_sample = 11
58 | 
59 | #######################################################################
60 | # Option 1: Built-In Functionality: soundpy does everything for you
61 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
62 | 
63 | ############################################################
64 | # Define which data to use and which features to extract. 
65 | # NOTE: beacuse of the very small dataset, will set 
66 | # `perc_train` to a lower level than 0.8. (Otherwise, will raise error)
67 | # Everything else is based on defaults. A feature folder with
68 | # the feature data will be created in the current working directory.
69 | # (Although, you can set this under the parameter `data_features_dir`)
70 | # `visualize` saves periodic images of the features extracted.
71 | # This is useful if you want to know what's going on during the process.
72 | perc_train = 0.6 # with larger datasets this would be around 0.8
73 | extraction_dir = sp.denoiser_feats(
74 |     data_clean_dir = data_clean_dir, 
75 |     data_noisy_dir = data_noisy_dir,
76 |     sr = sr,
77 |     feature_type = feature_type, 
78 |     dur_sec = dur_sec,
79 |     frames_per_sample = frames_per_sample,
80 |     perc_train = perc_train,
81 |     limit = 200,
82 |     visualize=True);
83 | extraction_dir
84 | 
85 | ################################################################
86 | # The extracted features, extraction settings applied, and 
87 | # which audio files were assigned to which datasets
88 | # will be saved in the `extraction_dir` directory
89 | 
90 | 
91 | ############################################################
92 | # And that's it!
93 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_featureprep_envclassifier.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 | =====================================
 4 | Feature Extraction for Classification
 5 | =====================================
 6 | 
 7 | Extract acoustic features from labeled data for 
 8 | training an environment or speech classifier.
 9 | 
10 | To see how soundpy implements this, see `soundpy.builtin.envclassifier_feats`.
11 | """
12 | 
13 | 
14 | ###############################################################################################
15 | # 
16 | 
17 | 
18 | #####################################################################
19 | import os, sys
20 | import inspect
21 | currentdir = os.path.dirname(os.path.abspath(
22 |     inspect.getfile(inspect.currentframe())))
23 | parentdir = os.path.dirname(currentdir)
24 | parparentdir = os.path.dirname(parentdir)
25 | packagedir = os.path.dirname(parparentdir)
26 | sys.path.insert(0, packagedir)
27 | 
28 | import matplotlib.pyplot as plt
29 | import soundpy as sp 
30 | import IPython.display as ipd
31 | package_dir = '../../../'
32 | os.chdir(package_dir)
33 | sp_dir = package_dir
34 | ######################################################
35 | # Prepare for Extraction: Data Organization
36 | # -----------------------------------------
37 | 
38 | ######################################################
39 | # I will use a sample speech commands data set:
40 | 
41 | ##########################################################
42 | # Designate path relevant for accessing audiodata
43 | data_dir = '/home/airos/Projects/Data/sound/speech_commands_small_section/'
44 | 
45 | ######################################################
46 | # Choose Feature Type 
47 | # ~~~~~~~~~~~~~~~~~~~
48 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
49 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
50 | 
51 | feature_type = 'fbank'
52 | 
53 | ######################################################
54 | # Set Duration of Audio 
55 | # ~~~~~~~~~~~~~~~~~~~~~
56 | # How much audio in seconds used from each audio file.
57 | # The example noise and speech files are only 1 second long
58 | dur_sec = 1
59 | 
60 | 
61 | #############################################################
62 | # Built-In Functionality - soundpy extracts the features for you
63 | # ----------------------------------------------------------------------------
64 | 
65 | ############################################################
66 | # Define which data to use and which features to extract
67 | # Everything else is based on defaults. A feature folder with
68 | # the feature data will be created in the current working directory.
69 | # (Although, you can set this under the parameter `data_features_dir`)
70 | # `visualize` saves periodic images of the features extracted.
71 | # This is useful if you want to know what's going on during the process.
72 | extraction_dir = sp.envclassifier_feats(data_dir, 
73 |                                           feature_type=feature_type, 
74 |                                           dur_sec=dur_sec,
75 |                                           visualize=True);
76 | 
77 | ################################################################
78 | # The extracted features, extraction settings applied, and 
79 | # which audio files were assigned to which datasets
80 | # will be saved in the following directory:
81 | extraction_dir
82 | 
83 | ############################################################
84 | # And that's it!
85 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_filter_out_noise.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | """
  4 | ===========================
  5 | Filter Out Background Noise
  6 | ===========================
  7 | 
  8 | Filter out background noise from noisy speech signals. 
  9 | 
 10 | To see how soundpy implements this, see `soundpy.builtin.filtersignal`.
 11 | 
 12 | As a general note for filtering, the Wiener Filter is the default filter for soundpy. It seems to filter signals more consequently than the Band Spectral Subtraction Filter.
 13 | """
 14 | 
 15 | 
 16 | ###############################################################################################
 17 | # 
 18 | 
 19 | 
 20 | #####################################################################
 21 | 
 22 | # Let's import soundpy, and ipd for playing audio data
 23 | import soundpy as sp
 24 | import IPython.display as ipd
 25 | 
 26 | 
 27 | ######################################################
 28 | # Define the noisy and clean speech audio files.
 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 30 | # Note: these files are available in the soundpy repo.
 31 | # Designate path relevant for accessing audiodata
 32 | sp_dir = '../../../'
 33 | 
 34 | ##########################################################
 35 | # Noise sample:
 36 | noise = '{}audiodata/background_samples/traffic.wav'.format(sp_dir)
 37 | noise = sp.string2pathlib(noise)
 38 | speech = '{}audiodata/python.wav'.format(sp_dir)
 39 | speech = sp.utils.string2pathlib(speech)
 40 | 
 41 | ##########################################################
 42 | # For filtering, we will set the sample rate to be quite high:
 43 | sr = 48000
 44 | 
 45 | ##########################################################
 46 | # Create noisy speech signal as SNR 10
 47 | noisy, snr_measured = sp.dsp.add_backgroundsound(
 48 |     speech, 
 49 |     noise, 
 50 |     sr = sr, 
 51 |     snr = 10, 
 52 |     total_len_sec = 3, 
 53 |     pad_mainsound_sec = 0.75)
 54 | 
 55 | ##########################################################
 56 | # Hear and see the noisy speech 
 57 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 58 | 
 59 | ipd.Audio(noisy,rate=sr)
 60 | 
 61 | ##########################################################
 62 | sp.plotsound(noisy, sr=sr, feature_type='signal', 
 63 |                title='Noisy Speech ')
 64 | 
 65 | 
 66 | ##########################################################
 67 | # Hear and see the clean speech 
 68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 69 | s, sr = sp.loadsound(speech, sr=sr)
 70 | ipd.Audio(s,rate=sr)
 71 | 
 72 | ##########################################################
 73 | sp.plotsound(s, sr=sr, feature_type='signal', 
 74 |                title='Clean Speech ')
 75 | 
 76 | 
 77 | ##########################################################
 78 | # Filter the noisy speech
 79 | # ^^^^^^^^^^^^^^^^^^^^^^^
 80 | 
 81 | ##########################################################
 82 | # Wiener Filter 
 83 | # ~~~~~~~~~~~~~
 84 | 
 85 | ##########################################################
 86 | # Let's filter with a Wiener filter:
 87 | noisy_wf, sr = sp.filtersignal(noisy,
 88 |                                  sr=sr,
 89 |                                  filter_type='wiener') # default
 90 | 
 91 | ##########################################################
 92 | ipd.Audio(noisy_wf,rate=sr)
 93 | 
 94 | ##########################################################
 95 | sp.plotsound(noisy_wf, sr=sr, feature_type='signal', 
 96 |                title='Noisy Speech: Wiener Filter')
 97 | 
 98 | #################################################################
 99 | # Wiener Filter with Postfilter
100 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101 | 
102 | ##########################################################
103 | # Let's filter with a Wiener filter and postfilter
104 | noisy_wfpf, sr = sp.filtersignal(noisy,
105 |                                  sr=sr,
106 |                                  filter_type='wiener',
107 |                                  apply_postfilter = True) 
108 | 
109 | ##########################################################
110 | ipd.Audio(noisy_wfpf,rate=sr)
111 | 
112 | ##########################################################
113 | sp.plotsound(noisy_wfpf, sr=sr, feature_type='signal', 
114 |                title='Noisy Speech: Wiener Filter with Postfilter')
115 | 
116 | #################################################################
117 | # Band Spectral Subtraction
118 | # ~~~~~~~~~~~~~~~~~~~~~~~~~
119 | 
120 | ##########################################################
121 | # Let's filter using band spectral subtraction
122 | noisy_bs, sr = sp.filtersignal(noisy,
123 |                                  sr=sr,
124 |                                  filter_type='bandspec') 
125 | 
126 | ##########################################################
127 | ipd.Audio(noisy_bs,rate=sr)
128 | 
129 | ##########################################################
130 | sp.plotsound(noisy_bs, sr=sr, feature_type='signal', 
131 |                title='Noisy Speech: Band Spectral Subtraction')
132 | 
133 | 
134 | #################################################################
135 | # Band Spectral Subtraction with Postfilter
136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137 | 
138 | #########################################################################
139 | # Finally, let's filter using band spectral subtraction with a postfilter
140 | noisy_bspf, sr = sp.filtersignal(noisy,
141 |                                  sr=sr,
142 |                                  filter_type='bandspec', 
143 |                                  apply_postfilter = True) 
144 | 
145 | ##########################################################
146 | ipd.Audio(noisy_bspf,rate=sr)
147 | 
148 | ##########################################################
149 | sp.plotsound(noisy_bspf, sr=sr, feature_type='signal', 
150 |                title='Noisy Speech: Band Spectral Subtraction with Postfilter')
151 | 
152 | 
153 | ##########################################################
154 | # Filter: increase the scale
155 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^
156 | 
157 | ##########################################################
158 | # Let's filter with a Wiener filter:
159 | filter_scale = 5
160 | noisy_wf, sr = sp.filtersignal(noisy,
161 |                                  sr=sr,
162 |                                  filter_type='wiener',
163 |                                  filter_scale = filter_scale)
164 | 
165 | ##########################################################
166 | # Wiener Filter
167 | # ~~~~~~~~~~~~~
168 | 
169 | ##########################################################
170 | ipd.Audio(noisy_wf,rate=sr)
171 | 
172 | ##########################################################
173 | sp.plotsound(noisy_wf, sr=sr, feature_type='signal', 
174 |                title='Noisy Speech: Wiener Filter Scale {}'.format(filter_scale))
175 | 
176 | #################################################################
177 | # Wiener Filter with Postfilter
178 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
179 | 
180 | ##########################################################
181 | # Let's filter with a Wiener filter and postfilter
182 | noisy_wfpf, sr = sp.filtersignal(noisy,
183 |                                  sr=sr,
184 |                                  filter_type='wiener',
185 |                                  apply_postfilter = True,
186 |                                  filter_scale = filter_scale) 
187 | 
188 | ##########################################################
189 | ipd.Audio(noisy_wfpf,rate=sr)
190 | 
191 | ##########################################################
192 | sp.plotsound(noisy_wfpf, sr=sr, feature_type='signal', 
193 |                title='Noisy Speech: Wiener Filter with Postfilter Scale {}'.format(filter_scale))
194 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_implement_denoiser.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =================================
  4 | Implement a Denoising Autoencoder
  5 | =================================
  6 | 
  7 | Implement denoising autoencoder to denoise a noisy speech signal.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_run`.
 10 | """
 11 | 
 12 | 
 13 | ############################################################################################
 14 | # 
 15 | 
 16 | #####################################################################
 17 | # Let's import soundpy and other packages
 18 | import soundpy as sp
 19 | import numpy as np
 20 | # for playing audio in this notebook:
 21 | import IPython.display as ipd
 22 | 
 23 | #####################################################################
 24 | # As well as the deep learning component of soundpy
 25 | from soundpy import models as spdl
 26 | 
 27 | ######################################################
 28 | # Prepare for Implementation: Data Organization
 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 30 | 
 31 | ##########################################################
 32 | # Set path relevant for audio data for this example
 33 | sp_dir = '../../../'
 34 | 
 35 | ######################################################
 36 | # Set model pathway
 37 | # ~~~~~~~~~~~~~~~~~
 38 | # Currently, this expects a model saved with weights, with a .h5 extension.
 39 | # (See `model` below)
 40 | 
 41 | ######################################################
 42 | # The soundpy repo offers a pre-trained denoiser, which we'll use.
 43 | model = '{}audiodata/models/'.format(sp_dir)+\
 44 |     'denoiser/example_denoiser_stft.h5'
 45 | # ensure is a pathlib.PosixPath object
 46 | print(model)
 47 | model = sp.utils.string2pathlib(model)
 48 | model_dir = model.parent
 49 | 
 50 | #########################################################
 51 | # What is in this folder?
 52 | files = list(model_dir.glob('*.*'))
 53 | for f in files:
 54 |     print(f.name)
 55 |   
 56 | ######################################################
 57 | # Provide dictionary with feature extraction settings
 58 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 59 | 
 60 | #########################################################
 61 | # If soundpy extracts features for you, a 'log_extraction_settings.csv' 
 62 | # file will be saved, which includes relevant feature settings for implementing 
 63 | # the model; see `soundpy.feats.save_features_datasets`
 64 | feat_settings = sp.utils.load_dict(
 65 |     model_dir.joinpath('log_extraction_settings.csv'))
 66 | for key, value in feat_settings.items():
 67 |     print(key, ' --> ', value)
 68 |     # change objects that were string to original format
 69 |     import ast
 70 |     try:
 71 |         feat_settings[key] = ast.literal_eval(value)
 72 |     except ValueError:
 73 |         pass
 74 |     except SyntaxError:
 75 |         pass
 76 | 
 77 | #########################################################
 78 | # For the purposes of plotting, let's use some of the settings defined:
 79 | feature_type = feat_settings['feature_type']
 80 | sr = feat_settings['sr']
 81 | 
 82 | ######################################################
 83 | # Provide new audio for the denoiser to denoise!
 84 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 85 | 
 86 | #########################################################
 87 | # We'll use sample speech from the soundpy repo:
 88 | speech = sp.string2pathlib('{}audiodata/python.wav'.format(sp_dir))
 89 | s, sr = sp.loadsound(speech, sr=sr)
 90 | 
 91 | #########################################################
 92 | # Let's add some white noise (10 SNR)
 93 | s_n = sp.augment.add_white_noise(s, sr=sr, snr=10)
 94 | 
 95 | ##############################################################
 96 | # What does the noisy audio sound like?
 97 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 98 | ipd.Audio(s_n,rate=sr)
 99 | 
100 | ##############################################################
101 | # What does the noisy audio look like?
102 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
103 | sp.plotsound(s_n, sr = sr, feature_type='signal')
104 | 
105 | ##############################################################
106 | # What does the clean audio sound like?
107 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 | ipd.Audio(s,rate=sr)
109 | 
110 | ##############################################################
111 | # What does the clean audio look like?
112 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
113 | sp.plotsound(s, sr = sr, feature_type='signal')
114 | 
115 | #########################################################################
116 | # Built-In Denoiser Functionality
117 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
118 | 
119 | ##############################################################
120 | # We just need to feed the model path, the noisy sample path, and 
121 | # the feature settings dictionary we looked at above.
122 | y, sr = spdl.denoiser_run(model, s_n, feat_settings)
123 | 
124 | ##########################################################
125 | # How does the output sound?
126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~
127 | ipd.Audio(y,rate=sr)
128 | 
129 | ##########################################################
130 | # How does is the output look? 
131 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
132 | sp.plotsound(y, sr=sr, feature_type = 'signal')
133 | 
134 | ##########################################################
135 | # How do the features compare?
136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137 | 
138 | ##########################################################
139 | # STFT features of the noisy input speech:
140 | sp.plotsound(s_n, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
141 |                title = 'Noisy input: STFT features')
142 | 
143 | ##########################################################
144 | # STFT features of the output
145 | sp.plotsound(y, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
146 |                title = 'Denoiser Output: STFT features')
147 | 
148 | ##########################################################
149 | # STFT features of the clean version of the audio:
150 | sp.plotsound(s, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
151 |                title = 'Clean "target" audio: STFT features')
152 | 
153 | 
154 | ##########################################################
155 | # It's not perfect but for a pretty simple implementation, the noise is gone
156 | # and you can hear the person speaking. Pretty cool! 
157 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_signals_and_features.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =======================
  4 | Create and Plot Signals
  5 | =======================
  6 | 
  7 | Create and plot signals / noise; combine them at a specific SNR.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.dsp.generate_sound`, 
 10 | `soundpy.dsp.generate_noise` and `soundpy.dsp.add_backgroundsound`.
 11 | """
 12 | 
 13 | 
 14 | ###############################################################################################
 15 | #  
 16 | 
 17 | #####################################################################
 18 | # Let's import soundpy
 19 | import soundpy as sp
 20 | 
 21 | ###########################################################################
 22 | # Create a Signal
 23 | # ^^^^^^^^^^^^^^^
 24 | 
 25 | ########################################################################
 26 | # First let's set what sample rate we want to use
 27 | sr = 44100
 28 | 
 29 | 
 30 | #########################################################################
 31 | # Let's create a signal of 10 Hz 
 32 | sig1_hz = 10
 33 | sig1, sr = sp.generate_sound(freq=sig1_hz, amplitude = 0.4, sr=sr, dur_sec=1)
 34 | sp.plotsound(sig1, sr=sr, feature_type = 'signal',
 35 |                title = 'Signal: {} Hz'.format(sig1_hz))
 36 | 
 37 | 
 38 | #########################################################################
 39 | # Let's create a signal of 20 Hz
 40 | sig2_hz = 20 
 41 | sig2, sr = sp.generate_sound(freq=sig2_hz, amplitude= 0.4, sr=sr, dur_sec=1)
 42 | sp.plotsound(sig2, sr=sr, feature_type = 'signal',
 43 |                title = 'Signal: {} Hz'.format(sig2_hz))
 44 | 
 45 | ###########################################################################
 46 | # Combine Signals 
 47 | # ^^^^^^^^^^^^^^^
 48 | 
 49 | 
 50 | #########################################################################
 51 | # Add them together and see what they look like:
 52 | sig3 = sig1 + sig2
 53 | sp.plotsound(sig3, sr=sr, feature_type = 'signal', 
 54 |                title='Mixed Signals: {} Hz + {} Hz'.format(sig1_hz, sig2_hz))
 55 | 
 56 | 
 57 | ##########################################################################
 58 | # Generate Pseudo-Random Noise
 59 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 60 | 
 61 | 
 62 | #########################################################################
 63 | # Create noise to add to the signal:
 64 | noise = sp.generate_noise(len(sig3), amplitude=0.025, random_seed=40)
 65 | sp.plotsound(noise, sr=sr, feature_type = 'signal',
 66 |                title='Random Noise')
 67 | 
 68 | ###########################################################################
 69 | # Control SNR: Adding a Background Sound
 70 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 71 | 
 72 | #########################################################################
 73 | # Add noise at signal-to-noise ratio of 40
 74 | sig_noisy, snr = sp.dsp.add_backgroundsound(
 75 |     audio_main = sig3, 
 76 |     audio_background = noise, 
 77 |     sr = sr,
 78 |     snr = 40,
 79 |     clip_at_zero = False)
 80 | 
 81 | # keep energy between 1 and -1 
 82 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
 83 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 40 SNR')
 84 | 
 85 | #########################################################################
 86 | # Add noise at signal-to-noise ratio of 20
 87 | sig_noisy, snr = sp.dsp.add_backgroundsound(
 88 |     audio_main = sig3, 
 89 |     audio_background = noise,
 90 |     sr = sr,
 91 |     snr = 20)
 92 | # keep energy between 1 and -1 
 93 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
 94 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 20 SNR')
 95 | 
 96 | #########################################################################
 97 | # Add noise at signal-to-noise ratio of 10
 98 | sig_noisy, snr = sp.dsp.add_backgroundsound(
 99 |     audio_main = sig3, 
100 |     audio_background = noise,
101 |     sr = sr,
102 |     snr = 10)
103 | # keep energy between 1 and -1 
104 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
105 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 10 SNR')
106 | 
107 | #########################################################################
108 | # Add noise at signal-to-noise ratio of 0
109 | sig_noisy, snr = sp.dsp.add_backgroundsound(
110 |     audio_main = sig3,
111 |     audio_background = noise,
112 |     sr = sr,
113 |     snr = 0)
114 | # keep energy between 1 and -1 
115 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
116 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 0 SNR')
117 | 
118 | 
119 | #########################################################################
120 | # Add noise at signal-to-noise ratio of -10
121 | sig_noisy, snr = sp.dsp.add_backgroundsound(
122 |     audio_main = sig3, 
123 |     audio_background = noise,
124 |     sr = sr,
125 |     snr = -10)
126 | # keep energy between 1 and -1 
127 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
128 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: -10 SNR')
129 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_train_classifier.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | ============================
  4 | Train an Acoustic Classifier
  5 | ============================
  6 | 
  7 | Train an acoustic classifier on speech or noise features.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_train`.
 10 | """
 11 | 
 12 | ###############################################################################################
 13 | #
 14 | 
 15 | #####################################################################
 16 | # Let's import soundpy for handling sound
 17 | import soundpy as sp
 18 | #####################################################################
 19 | # As well as the deep learning component of soundpy
 20 | from soundpy import models as spdl
 21 | 
 22 | 
 23 | ######################################################
 24 | # Prepare for Training: Data Organization
 25 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 26 | 
 27 | ##########################################################
 28 | # Set path relevant for audio data for this example
 29 | sp_dir = '../../../'
 30 | 
 31 | ######################################################
 32 | # I will load previously extracted features (from the Speech Commands Dataset) 
 33 | # See `soundpy.feats.save_features_datasets` or `soundpy.builtin.envclassifier_feats`
 34 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
 35 |     'envclassifier/example_feats_fbank/'
 36 | 
 37 | #########################################################
 38 | # What is in this folder?
 39 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
 40 | files = list(feature_extraction_dir.glob('*.*'))
 41 | for f in files:
 42 |     print(f.name)
 43 |   
 44 | #########################################################
 45 | # The .npy files contain the features themselves, in train, validation, and
 46 | # test datasets:
 47 | files = list(feature_extraction_dir.glob('*.npy'))
 48 | for f in files:
 49 |     print(f.name)
 50 |   
 51 | #########################################################
 52 | # The .csv files contain information about how the features were extracted
 53 | files = list(feature_extraction_dir.glob('*.csv'))
 54 | for f in files:
 55 |     print(f.name)
 56 | 
 57 | #########################################################
 58 | # We'll have a look at which features were extracted and other settings:
 59 | feat_settings = sp.utils.load_dict(
 60 |     feature_extraction_dir.joinpath('log_extraction_settings.csv'))
 61 | for key, value in feat_settings.items():
 62 |     print(key, ' --> ', value)
 63 |     
 64 | #########################################################
 65 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
 66 |     
 67 | #########################################################
 68 | # We'll have a look at the audio files that were assigned 
 69 | # to the train, val, and test datasets. 
 70 | audio_datasets = sp.utils.load_dict(
 71 |     feature_extraction_dir.joinpath('dataset_audiofiles.csv'))
 72 | count = 0
 73 | for key, value in audio_datasets.items():
 74 |     print(key, ' --> ', value)
 75 |     count += 1
 76 |     if count > 5:
 77 |         break
 78 | 
 79 | #############################################################
 80 | # Built-In Functionality: soundpy does everything for you
 81 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 82 | # For more about this function, see `soundpy.models.builtin.envclassifier_train`.
 83 | 
 84 | #############################################################
 85 | model_dir, history = spdl.envclassifier_train(
 86 |     feature_extraction_dir = feature_extraction_dir,
 87 |     epochs = 50,
 88 |     patience = 30)
 89 | 
 90 | #############################################################
 91 | # Where the model and logs are located:
 92 | model_dir
 93 | 
 94 | #############################################################
 95 | # Let's plot how the model performed (on this mini dataset)
 96 | import matplotlib.pyplot as plt
 97 | plt.clf()
 98 | plt.plot(history.history['accuracy'])
 99 | plt.plot(history.history['val_accuracy'])
100 | plt.title('model accuracy')
101 | plt.ylabel('accuracy')
102 | plt.xlabel('epoch')
103 | plt.legend(['train', 'val'], loc='upper right')
104 | plt.show()
105 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_train_denoiser.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =============================
  4 | Train a Denoising Autoencoder
  5 | =============================
  6 | 
  7 | Train a denoising autoencoder with clean and noisy acoustic features.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_train`, 
 10 | `soundpy.builtin.denoiser_feats` and `soundpy.builtin.create_denoise_data`.
 11 | """
 12 | 
 13 | 
 14 | ###############################################################################################
 15 | # 
 16 | 
 17 | 
 18 | #####################################################################
 19 | # Let's import soundpy for handling sound
 20 | import soundpy as sp
 21 | #####################################################################
 22 | # As well as the deep learning component of soundpy
 23 | from soundpy import models as spdl
 24 | 
 25 | 
 26 | ######################################################
 27 | # Prepare for Training: Data Organization
 28 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 29 | 
 30 | ##########################################################
 31 | # Designate path relevant for accessing audiodata
 32 | sp_dir = '../../../'
 33 | 
 34 | 
 35 | ######################################################
 36 | # I will load previously extracted features (sample data), see `soundpy.feats.save_features_datasets` or `soundpy.builtin.denoiser_feats`
 37 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
 38 |     'denoiser/example_feats_fbank/'
 39 | 
 40 | #########################################################
 41 | # What is in this folder?
 42 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
 43 | files = list(feature_extraction_dir.glob('*.*'))
 44 | for f in files:
 45 |     print(f.name)
 46 |   
 47 | #########################################################
 48 | # The .npy files contain the features themselves, in train, validation, and
 49 | # test datasets:
 50 | files = list(feature_extraction_dir.glob('*.npy'))
 51 | for f in files:
 52 |     print(f.name)
 53 |   
 54 | #########################################################
 55 | # The .csv files contain information about how the features were extracted
 56 | files = list(feature_extraction_dir.glob('*.csv'))
 57 | for f in files:
 58 |     print(f.name)
 59 | 
 60 | #########################################################
 61 | # We'll have a look at which features were extracted and other settings:
 62 | feat_settings = sp.utils.load_dict(
 63 |     feature_extraction_dir.joinpath('log_extraction_settings.csv'))
 64 | for key, value in feat_settings.items():
 65 |     print(key, ' ---> ', value)
 66 |     
 67 | #########################################################
 68 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
 69 |     
 70 | #########################################################
 71 | # We'll have a look at the audio files that were assigned 
 72 | # to the train, val, and test datasets.
 73 | audio_datasets = sp.utils.load_dict(
 74 |     feature_extraction_dir.joinpath('audiofiles_datasets_clean.csv'))
 75 | count = 0
 76 | for key, value in audio_datasets.items():
 77 |     print(key, ' ---> ', value)
 78 |     count += 1
 79 |     if count > 5:
 80 |         break
 81 | 
 82 | #############################################################
 83 | # Built-In Functionality: soundpy does everything for you
 84 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 85 | # For more about this, see `soundpy.builtin.denoiser_train`.
 86 | 
 87 | #############################################################
 88 | model_dir, history = spdl.denoiser_train(
 89 |     feature_extraction_dir = feature_extraction_dir,
 90 |     epochs = 50)
 91 | 
 92 | #########################################################
 93 | 
 94 | 
 95 | #############################################################
 96 | # Where the model and logs are located:
 97 | model_dir
 98 | 
 99 | 
100 | #############################################################
101 | # Let's plot how the model performed (on this mini dataset)
102 | 
103 | import matplotlib.pyplot as plt
104 | plt.plot(history.history['loss'])
105 | plt.plot(history.history['val_loss'])
106 | plt.title('model loss')
107 | plt.ylabel('loss')
108 | plt.xlabel('epoch')
109 | plt.legend(['train', 'val'], loc='upper right')
110 | plt.show()
111 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/exceptions.rst:
--------------------------------------------------------------------------------
1 | 
2 | Customized Errors
3 | -----------------
4 | 
5 | .. automodule:: soundpy.exceptions
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/feats.rst:
--------------------------------------------------------------------------------
1 | 
2 | Extract and manipulate audio features
3 | -------------------------------------
4 | 
5 | .. automodule:: soundpy.feats
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/files.rst:
--------------------------------------------------------------------------------
1 | 
2 | Working with audio files
3 | ------------------------
4 | 
5 | .. automodule:: soundpy.files
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/filters.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Filters: Wiener and Band Spectral Subtraction
 3 | ---------------------------------------------
 4 | 
 5 | .. automodule:: soundpy.filters
 6 |    :members:
 7 |    :undoc-members:
 8 |    :show-inheritance:
 9 | 
10 | .. autoclass:: soundpy.filters.FilterSettings
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 |    
15 |    .. automethod:: __init__
16 |    
17 | .. autoclass:: soundpy.filters.Filter
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 |    
22 |    .. automethod:: __init__
23 |    
24 |    
25 | .. autoclass:: soundpy.filters.WienerFilter
26 |    :members:
27 |    :undoc-members:
28 |    :show-inheritance:
29 |    
30 |    .. automethod:: __init__
31 |    
32 |    
33 | .. autoclass:: soundpy.filters.BandSubtraction
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 |    
38 |    .. automethod:: __init__
39 |    
40 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/index.rst:
--------------------------------------------------------------------------------
 1 | .. SoundPy documentation master file, created by
 2 |    sphinx-quickstart on Mon Jun 15 11:57:18 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | SoundPy v0.1.0a2
 7 | ====================  
 8 | 
 9 | Welcome to the docs!
10 | --------------------
11 | 
12 | SoundPy is a research based Python package_ for exploring and experimenting with sound and deep learning. 
13 | 
14 | Those who might find this useful: 
15 | 
16 | * speech and sound enthusiasts
17 | * digital signal processing / mathematics / physics / acoustics enthusiasts
18 | * deep learning enthusiasts
19 | * researchers
20 | * linguists
21 | * psycholinguists
22 | 
23 | The main goal of SoundPy is to provide the code and functionality with more context via visualization, research, and mathematics. Most of the resources used to build the functionality stems from publicly available research and datasets.
24 | 
25 | As it covers quite a large range, from audio file conversion to implementation of trained neural networks, the purpose of SoundPy is not to be the perfect implementation of all functions (although that is also a goal :P ), but rather a peak into how they *can* be implemented, hopefully offering people a foundation for trying out different ways of implementation (feature extraction, building neural networks, etc.).
26 | 
27 | This project is still in the beginning stages and has a lot of room for growth, especially with contributors having a background / knowlege in data science, computer science, machine and deep learning, physics, acoustics, or dsp. Contributors from other backgrounds are also welcome! If you'd like SoundPy to do something it doesn't, try making it or create an issue.
28 | 
29 | .. _package: https://github.com/a-n-rose/Python-Sound-Tool/tree/master
30 | 
31 | .. toctree:: 
32 |    :maxdepth: 2
33 |    
34 |    example_cases.rst
35 |    readme.rst
36 | 
37 |    
38 | .. toctree:: 
39 |    :maxdepth: 1
40 |    
41 |    changelog.rst
42 |    
43 | * :ref:`genindex`
44 | * :ref:`modindex`
45 | * :ref:`search`
46 | 
47 | :Author:
48 |     Aislyn Rose 
49 |     
50 |     rose.aislyn.noelle@gmail.com
51 |     
52 |     webpage_
53 |     
54 |     github_
55 |     
56 | .. _webpage: https://a-n-rose.github.io/
57 |  
58 | .. _github : https://github.com/a-n-rose
59 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/model_dataprep.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Feeding large datasets to models
 3 | --------------------------------
 4 | 
 5 | .. autoclass:: soundpy.models.dataprep.Generator
 6 |    :members:
 7 |    :undoc-members:
 8 |    :show-inheritance:
 9 |    
10 |    .. automethod:: __init__
11 | 
12 |    
13 | .. automodule:: soundpy.models.dataprep
14 |    :members:
15 |    :undoc-members:
16 |    :show-inheritance:
17 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/modelsetup.rst:
--------------------------------------------------------------------------------
1 | 
2 | Additional model setup (e.g. Early Stopping)
3 | --------------------------------------------
4 | 
5 | .. automodule:: soundpy.models.modelsetup
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/modules.rst:
--------------------------------------------------------------------------------
 1 | =========================
 2 | SoundPy Functionality
 3 | =========================
 4 | 
 5 | .. include:: builtin_sp.rst
 6 | 
 7 | .. include:: builtin_spdl.rst
 8 | 
 9 | .. include:: augment.rst
10 | 
11 | .. include:: files.rst
12 | 
13 | .. include:: datasets.rst
14 | 
15 | .. include:: dsp.rst
16 | 
17 | .. include:: filters.rst
18 | 
19 | .. include:: feats.rst
20 | 
21 | .. include:: template_models.rst
22 | 
23 | .. include:: modelsetup.rst
24 | 
25 | .. include:: model_dataprep.rst
26 | 
27 | .. include:: utils.rst
28 | 
29 | .. include:: exceptions.rst
30 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: modules.rst
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/template_models.rst:
--------------------------------------------------------------------------------
1 | Template deep neural networks
2 | -----------------------------
3 | 
4 | .. automodule:: soundpy.models.template_models
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a2/utils.rst:
--------------------------------------------------------------------------------
1 | 
2 | Other useful non-specific functionality
3 | ---------------------------------------
4 | 
5 | .. automodule:: soundpy.utils
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/augment.rst:
--------------------------------------------------------------------------------
1 | 
2 | Augment audio data
3 | ------------------
4 | 
5 | .. automodule:: soundpy.augment
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/builtin_sp.rst:
--------------------------------------------------------------------------------
1 | 
2 | Built-In Functionality (non Deep Learning)
3 | ------------------------------------------
4 | 
5 | .. automodule:: soundpy.builtin
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/builtin_spdl.rst:
--------------------------------------------------------------------------------
1 | 
2 | Built-In Functionality (Deep Learning)
3 | --------------------------------------
4 | 
5 | .. automodule:: soundpy.models.builtin
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/changelog.rst:
--------------------------------------------------------------------------------
 1 | *********
 2 | Changelog
 3 | *********
 4 | 
 5 | v0.1.0a
 6 | =======
 7 | 
 8 | 
 9 | v0.1.0a3
10 | --------
11 | 2021-04-09
12 | 
13 | Bug fixes
14 |    -  no longer use Librosa for feature extraction: allow easier implementation of augmentations, especially during training. 
15 |    -  `soundpy.feats.plot` now uses parameter `subprocess` to allow for different backends to be applied, depending on when funciton is called. For example, if plotting from within a Generator while training, `subprocess` should be set to True, and the 'Agg' backend will be applied. Otherwise, 'TkAgg' backend is used. Fixes issues with multi-threading.
16 |    -  Fixed generator and Tensorflow issue: with Tensorflow 2.2.0+ the models in `soundpy.models.builtin` that were trained via generator failed. Use `tensorflow.data.Dataset.from_generator` to feed generator data to models.
17 |    -  Improved `clip_at_zero`.
18 | 
19 | Features
20 |    -  Python 3.8 can now be used.
21 |    -  throw depreciation warning for parameters `context_window` or `frames_per_sample` as these "features" will be removed from feature extraction. Rather the features can be reshaped post feature extraction.
22 |    -  added `timestep`, `axis_timestep`, `context_window`, `axis_context_window`  and `combine_axes_0_1` paremeters to  `soundpy.models.Generator`:  allow more control over shape of the features.
23 |    -  can run `soundpy.models.builtin.envclassifier_extract_train` to run with pre-extracted val and test features. 
24 |    -  `soundpy.feats.plotsound`, `soundpy.feats.plot_vad` and `soundpy.feats.plot_dom_freq` all can plot stereo sound: for each channel in a stereo signal, a plot is either generated or saved. If a filename already exists, a date stamp is added to filename to avoid overwriting images.
25 |    - allow `grayscale2color` to be applied to 2D data.
26 | 
27 | Breaking changes
28 |    -  `soundpy.models.Generator` uses parameter `normalize` instaed of `normalized`. Found this to be more intuitive. If `normalize` is set to True, data will be normalized. Before, if `normalized` was set to True, data would not be normalized.
29 |    -  removed `add_tensor_last` and `add_tensor_first`: require adding of tensors (for keras) to be included in parameter `desired_input_shape`.
30 |    
31 | Other changes 
32 |    -  CPU soundpy can use Tensorflow 2.1.0, 2.2.0 and 2.3.0. Dockerfile still uses Tensorflow 2.1.0 as it is still compatible with updated code.
33 |    -  `soundpy.models.builtin.implement_denoiser` raises warning if cleaned features cannot be converted to raw audio samples.
34 | 
35 |    
36 | v0.1.0a2
37 | --------
38 | 2020-08-13
39 | 
40 | 
41 | Bug fixes
42 |    -  added `use_beg_ms` parameter in `soundpy.dsp.vad`: improved VAD recognition of silences post speech.
43 | 
44 | Features
45 |    -  added GPU option: provide instructions and Docker image for running SoundPy with GPU
46 |    -  added `extend_window_ms` paremeter to `soundpy.feats.get_vad_samples` and `soundpy.feats.get_vad_stft`:  can extend VAD window if desired. Useful in higher SNR environments.
47 |    -  added `soundpy.feats.get_samples_clipped` and `soundpy.feats.get_stft_clipped` to clip off beginning and ending silences.
48 |    -  added `beg_end_clipped` parameter to `soundpy.feats.plot_vad` to visualize VAD by clipping the beginning and ending silences (if True) or VAD instances throughout the signal (if False).
49 |    -  added `soundpy.models.dataprep.GeneratorFeatExtraction` class for extracting and augmenting features during training (still experimental).
50 |    -  added `soundpy.models.builtin.envclassifier_extract_train` as an example of extracting and augmenting features during training (still experimental).
51 |    -  added `soundpy.dsp.clip_at_zero` to enable smoother concatenations of signals and enables removal of clicks at beginning and ending of signals.
52 |    -  added `soundpy.dsp.remove_dc_bias` to enable smoother concatenations of signals
53 |    -  added and set `remove_dc` parameter to True in `soundpy.files.loadsound` and `soundpy.files.savesound` to ensure signals all have mean zero.
54 |    -  added `mirror_sound` option to `soundpy.dsp.apply_sample_length` as a way to extend sound.
55 |    -  added `soundpy.dsp.ismono` to check if samples were mono or stereo.
56 |    -  added `soundpy.dsp.average_channels` to average sample amplitudes across channels, e.g. to identify where high energy begins / ends in the signal without disregarding additional channels (if stereo sound).
57 |    -  added `soundpy.dsp.add_channels` for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound)
58 |    -  added stereo sound functionality to `soundpy.dsp.add_backgroundsound`, `soundpy.dsp.clip_at_zero`, `soundpy.dsp.calc_fft`, `soundpy.feats.get_stft`, `soundpy.feats.get_vad_stft` 
59 |    
60 | 
61 | Other changes
62 |    -  name change: from pysoundtool to soundpy: simpler
63 |    -  updated dependencies to newest versions still compatible with Tensorflow 2.1.0
64 |    -  moved `soundpy.dsp.get_vad_samples` to `soundpy.feats.get_vad_samples`
65 |    -  moved `soundpy.dsp.get_vad_stft` to `soundpy.feats.get_vad_stft`
66 |    -  name change: allow `soundpy.feats.normalize` to be used as `soundpy.normalize`
67 |    -  removed `pysoundtool_online` and mybinder button as maintaining the online version was not easily done. Aim to reimplement at some point.
68 |    
69 | 
70 | 
71 | v0.1.0a1
72 | ========
73 | 
74 | Initial public alpha release.
75 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/datasets.rst:
--------------------------------------------------------------------------------
1 | 
2 | Organizing datasets
3 | -------------------
4 | 
5 | .. automodule:: soundpy.datasets
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/dsp.rst:
--------------------------------------------------------------------------------
1 | 
2 | Working with signals
3 | --------------------
4 | 
5 | .. automodule:: soundpy.dsp
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/example_cases.rst:
--------------------------------------------------------------------------------
1 | 
2 | .. toctree::
3 |    :maxdepth: 2
4 | 
5 | .. include:: auto_examples/index.rst
6 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/README.txt:
--------------------------------------------------------------------------------
1 |  
2 | -----------------------------
3 | SoundPy Example Use Cases
4 | -----------------------------
5 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_dataset_info_formatting.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 | ========================================
 4 | Audio Dataset Exploration and Formatting
 5 | ========================================
 6 | 
 7 | Examine audio files within a dataset, and reformat them if desired.  
 8 | 
 9 | To see how soundpy implements this, see `soundpy.builtin.dataset_logger` and 
10 | `soundpy.builtin.dataset_formatter`.
11 | """
12 | 
13 | #####################################################################
14 | # Let's import soundpy 
15 | import soundpy as sp
16 | 
17 | ###############################################################################################
18 | #  
19 | # Dataset Exploration
20 | # ^^^^^^^^^^^^^^^^^^^
21 | 
22 | ##########################################################
23 | # Designate path relevant for accessing audiodata
24 | sp_dir = '../../../'
25 | 
26 | ##########################################################
27 | # I will explore files in a small dataset on my computer with varying file formats.
28 | dataset_path = '{}audiodata2/'.format(sp_dir)
29 | dataset_info_dict = sp.builtin.dataset_logger('{}audiodata2/'.format(sp_dir));
30 | 
31 | #########################################################################
32 | # This returns our data in a dictionary, perfect for exploring via Pandas
33 | import pandas as pd
34 | all_data = pd.DataFrame(dataset_info_dict).T
35 | all_data.head()
36 | 
37 | ###################################
38 | # Let's have a look at the audio files and how uniform they are:
39 | print('formats: ', all_data.format_type.unique())
40 | print('bitdepth (types): ', all_data.bitdepth.unique())
41 | print('mean duration (sec): ', all_data.dur_sec.mean())
42 | print('std dev duration (sec): ', all_data.dur_sec.std())
43 | print('min sample rate: ', all_data.sr.min())
44 | print('max sample rate: ', all_data.sr.max())
45 | print('number of channels: ', all_data.num_channels.unique())
46 | 
47 | 
48 | ##########################################################
49 | # For a visual example, let's plot the count of various sample rates. (48000 Hz is high definition sound, 16000 Hz is wideband, and 8000 Hz is narrowband, similar to how speech sounds on the telephone.)
50 | all_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
51 | 
52 | ###############################################################################################
53 | # Reformat a Dataset
54 | # ^^^^^^^^^^^^^^^^^^
55 | 
56 | ##############################################################
57 | # Let's say we have a dataset that we want to make consistent. 
58 | # We can do that with soundpy
59 | new_dataset_dir = sp.builtin.dataset_formatter(
60 |     dataset_path, 
61 |     recursive = True, # we want all the audio, even in nested directories
62 |     format='WAV',
63 |     bitdepth = 16, # if set to None, a default bitdepth will be applied
64 |     sr = 16000, # wideband
65 |     mono = True, # ensure data all have 1 channel
66 |     dur_sec = 3, # audio will be limited to 3 seconds
67 |     zeropad = True, # audio shorter than 3 seconds will be zeropadded
68 |     new_dir = './example_dir/', # if None, a time-stamped directory will be created for you
69 |     overwrite = False # can set to True if you want to overwrite files
70 |     );
71 |         
72 | ###############################################
73 | # Let's see what the audio data looks like now:
74 | dataset_formatted_dict = sp.builtin.dataset_logger(new_dataset_dir, recursive=True);
75 | formatted_data = pd.DataFrame(dataset_formatted_dict).T
76 | 
77 | #####################
78 | formatted_data.head()
79 | 
80 | ###################################
81 | print('audio formats: ', formatted_data.format_type.unique())
82 | print('bitdepth (types): ', formatted_data.bitdepth.unique())
83 | print('mean duration (sec): ', formatted_data.dur_sec.mean())
84 | print('std dev duration (sec): ', formatted_data.dur_sec.std())
85 | print('min sample rate: ', formatted_data.sr.min())
86 | print('max sample rate: ', formatted_data.sr.max())
87 | print('number of channels: ', formatted_data.num_channels.unique())
88 | 
89 | ##########################################################
90 | # Now all the audio data is sampled at the same rate: 8000 Hz
91 | formatted_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
92 | 
93 | ###########################################
94 | # There we go! 
95 | # You can reformat only parts of the audio files, e.g. format or bitdepth.
96 | # If you leave parameters in sp.builtin.dataset_formatter as None, the original
97 | # settings of the audio file will be maintained (except for bitdepth. 
98 | # A default bitdepth will be applied according to the format of the file); see `soundfile.default_subtype`.
99 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_extract_augment_train_classifier.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | ==================================================
  4 | Extract, Augment, and Train an Acoustic Classifier
  5 | ==================================================
  6 | 
  7 | Extract and augment features as an acoustic classifier is trained on speech.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_extract_train`.
 10 | """
 11 | 
 12 | ###############################################################################################
 13 | #
 14 | 
 15 | import os, sys
 16 | import inspect
 17 | currentdir = os.path.dirname(os.path.abspath(
 18 |     inspect.getfile(inspect.currentframe())))
 19 | parentdir = os.path.dirname(currentdir)
 20 | parparentdir = os.path.dirname(parentdir)
 21 | packagedir = os.path.dirname(parparentdir)
 22 | sys.path.insert(0, packagedir)
 23 | 
 24 | import matplotlib.pyplot as plt
 25 | import IPython.display as ipd
 26 | package_dir = '../../../'
 27 | os.chdir(package_dir)
 28 | sp_dir = package_dir
 29 | 
 30 | 
 31 | #####################################################################
 32 | # Let's import soundpy for handling sound
 33 | import soundpy as sp
 34 | #####################################################################
 35 | # As well as the deep learning component of soundpy
 36 | from soundpy import models as spdl
 37 | 
 38 | 
 39 | ######################################################
 40 | # Prepare for Training: Data Organization
 41 | # =======================================
 42 | 
 43 | ######################################################
 44 | # I will use a sample speech commands data set:
 45 | 
 46 | ##########################################################
 47 | # Designate path relevant for accessing audiodata
 48 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir)
 49 | 
 50 | 
 51 | ######################################################
 52 | # Setup a Feature Settings Dictionary
 53 | # -----------------------------------
 54 | 
 55 | 
 56 | feature_type = 'fbank'
 57 | num_filters = 40
 58 | rate_of_change = False
 59 | rate_of_acceleration = False
 60 | dur_sec = 1
 61 | win_size_ms = 25
 62 | percent_overlap = 0.5
 63 | sr = 22050
 64 | fft_bins = None
 65 | num_mfcc = None
 66 | real_signal = True
 67 | 
 68 | get_feats_kwargs = dict(feature_type = feature_type,
 69 |                         sr = sr,
 70 |                         dur_sec = dur_sec,
 71 |                         win_size_ms = win_size_ms,
 72 |                         percent_overlap = percent_overlap,
 73 |                         fft_bins = fft_bins,
 74 |                         num_filters = num_filters,
 75 |                         num_mfcc = num_mfcc,
 76 |                         rate_of_change = rate_of_change,
 77 |                         rate_of_acceleration = rate_of_acceleration,
 78 |                         real_signal = real_signal)
 79 | 
 80 | ######################################################
 81 | # Setup an Augmentation Dictionary
 82 | # --------------------------------
 83 | # This will apply augmentations at random at each epoch.
 84 | augmentation_all = dict([('add_white_noise',True),
 85 |                         ('speed_decrease', True),
 86 |                         ('speed_increase', True),
 87 |                         ('pitch_decrease', True),
 88 |                         ('pitch_increase', True),
 89 |                         ('harmonic_distortion', True),
 90 |                         ('vtlp', True)
 91 |                         ])
 92 | 
 93 | ##########################################################
 94 | # see the default values for these augmentations
 95 | augment_settings_dict = {}
 96 | for key in augmentation_all.keys():
 97 |     augment_settings_dict[key] = sp.augment.get_augmentation_settings_dict(key)
 98 | for key, value in augment_settings_dict.items():
 99 |     print(key, ' : ', value)
100 |     
101 | ##########################################################
102 | # Adjust Augmentation Defaults
103 | # ----------------------------
104 | 
105 | 
106 | ##########################################################
107 | # Adjust Add White Noise
108 | # ~~~~~~~~~~~~~~~~~~~~~~
109 | # I want the SNR of the white noise to vary between several: 
110 | # SNR 10, 15, and 20. 
111 | augment_settings_dict['add_white_noise']['snr'] = [10,15,20]
112 | 
113 | ##########################################################
114 | # Adjust Pitch Decrease
115 | # ~~~~~~~~~~~~~~~~~~~~~
116 | # I found the pitch changes too exaggerated, so I will 
117 | # set those to 1 instead of 2 semitones.  
118 | augment_settings_dict['pitch_decrease']['num_semitones'] = 1 
119 | 
120 | ##########################################################
121 | # Adjust Pitch Increase
122 | # ~~~~~~~~~~~~~~~~~~~~~
123 | augment_settings_dict['pitch_increase']['num_semitones'] = 1 
124 | 
125 | ##########################################################
126 | # Adjust Speed Decrease
127 | # ~~~~~~~~~~~~~~~~~~~~~
128 | augment_settings_dict['speed_decrease']['perc'] = 0.1 
129 | 
130 | ##########################################################
131 | # Adjust Speed Increase
132 | # ~~~~~~~~~~~~~~~~~~~~~
133 | augment_settings_dict['speed_increase']['perc'] = 0.1 
134 | 
135 | 
136 | ######################################################
137 | # Update an Augmentation Dictionary
138 | # ---------------------------------
139 | # We'll include in the dictionary the settings we want for augmentations:
140 | augmentation_all.update(
141 |     dict(augment_settings_dict = augment_settings_dict))
142 | 
143 | 
144 | ######################################################
145 | # Train the Model
146 | # ===============
147 | # Note: disregard the warning:
148 | # WARNING: Only the power spectrum of the VTLP augmented signal can be returned due to resizing the augmentation from (56, 4401) to (79, 276)
149 | # 
150 | # This is due to the hyper frequency resolution applied to the audio during 
151 | # vocal-tract length perturbation, and then deresolution to bring to correct size.
152 | # The current implementation applies the deresolution to the power spectrum rather than
153 | # directly to the STFT. 
154 | model_dir, history = spdl.envclassifier_extract_train(
155 |     model_name = 'augment_builtin_speechcommands',
156 |     audiodata_path = data_dir,
157 |     augment_dict = augmentation_all,
158 |     labeled_data = True,
159 |     batch_size = 1,
160 |     epochs = 50, 
161 |     patience = 5,
162 |     visualize = True,
163 |     vis_every_n_items = 1,
164 |     **get_feats_kwargs)
165 | 
166 | #############################################################
167 | # Let's plot how the model performed (on this small dataset)
168 | plt.clf()
169 | plt.plot(history.history['accuracy'])
170 | plt.plot(history.history['val_accuracy'])
171 | plt.title('model accuracy')
172 | plt.ylabel('accuracy')
173 | plt.xlabel('epoch')
174 | plt.legend(['train', 'val'], loc='upper right')
175 | plt.savefig('accuracy.png')
176 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_featureprep_denoiser.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =======================================================
  4 | Feature Extraction for Denoising: Clean and Noisy Audio
  5 | =======================================================
  6 | 
  7 | Extract acoustic features from clean and noisy datasets for 
  8 | training a denoising model, e.g. a denoising autoencoder.
  9 | 
 10 | To see how soundpy implements this, see `soundpy.builtin.denoiser_feats`.
 11 | """
 12 | 
 13 | 
 14 | ###############################################################################################
 15 | # 
 16 | 
 17 | #####################################################################
 18 | import os, sys
 19 | import inspect
 20 | currentdir = os.path.dirname(os.path.abspath(
 21 |     inspect.getfile(inspect.currentframe())))
 22 | parentdir = os.path.dirname(currentdir)
 23 | parparentdir = os.path.dirname(parentdir)
 24 | packagedir = os.path.dirname(parparentdir)
 25 | sys.path.insert(0, packagedir)
 26 | 
 27 | import soundpy as sp 
 28 | import IPython.display as ipd
 29 | package_dir = '../../../'
 30 | os.chdir(package_dir)
 31 | sp_dir = package_dir
 32 | 
 33 | ######################################################
 34 | # Prepare for Extraction: Data Organization
 35 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 36 | 
 37 | ######################################################
 38 | # I will use a mini denoising dataset as an example
 39 | 
 40 | # Example noisy data:
 41 | data_noisy_dir = '{}../mini-audio-datasets/denoise/noisy'.format(sp_dir)
 42 | # Example clean data:
 43 | data_clean_dir = '{}../mini-audio-datasets/denoise/clean'.format(sp_dir)
 44 | # Where to save extracted features:
 45 | data_features_dir = './audiodata/example_feats_models/denoiser/'
 46 | 
 47 | ######################################################
 48 | # Choose Feature Type 
 49 | # ~~~~~~~~~~~~~~~~~~~
 50 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
 51 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
 52 | 
 53 | feature_type = 'stft'
 54 | sr = 22050
 55 | 
 56 | ######################################################
 57 | # Set Duration of Audio 
 58 | # ~~~~~~~~~~~~~~~~~~~~~
 59 | # How much audio in seconds used from each audio file.
 60 | # the speech samples are about 3 seconds long.
 61 | dur_sec = 3
 62 | 
 63 | #######################################################################
 64 | # Option 1: Built-In Functionality: soundpy does everything for you
 65 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 66 | 
 67 | ############################################################
 68 | # Define which data to use and which features to extract. 
 69 | # NOTE: beacuse of the very small dataset, will set 
 70 | # `perc_train` to a lower level than 0.8. (Otherwise, will raise error)
 71 | # Everything else is based on defaults. A feature folder with
 72 | # the feature data will be created in the current working directory.
 73 | # (Although, you can set this under the parameter `data_features_dir`)
 74 | # `visualize` saves periodic images of the features extracted.
 75 | # This is useful if you want to know what's going on during the process.
 76 | perc_train = 0.6 # with larger datasets this would be around 0.8
 77 | extraction_dir = sp.denoiser_feats(
 78 |     data_clean_dir = data_clean_dir, 
 79 |     data_noisy_dir = data_noisy_dir,
 80 |     sr = sr,
 81 |     feature_type = feature_type, 
 82 |     dur_sec = dur_sec,
 83 |     perc_train = perc_train,
 84 |     visualize=True);
 85 | extraction_dir
 86 | 
 87 | ################################################################
 88 | # The extracted features, extraction settings applied, and 
 89 | # which audio files were assigned to which datasets
 90 | # will be saved in the `extraction_dir` directory
 91 | 
 92 | 
 93 | ############################################################
 94 | # Logged Information
 95 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 96 | # Let's have a look at the files in the extraction_dir. The files ending 
 97 | # with .npy extension contain the feature data; the .csv files contain 
 98 | # logged information. 
 99 | featfiles = list(extraction_dir.glob('*.*'))
100 | for f in featfiles:
101 |     print(f.name)
102 |   
103 | ############################################################
104 | # Feature Settings
105 | # ~~~~~~~~~~~~~~~~~~
106 | # Since much was conducted behind the scenes, it's nice to know how the features
107 | # were extracted, for example, the sample rate and number of frequency bins applied, etc.
108 | feat_settings = sp.utils.load_dict(
109 |     extraction_dir.joinpath('log_extraction_settings.csv'))
110 | for key, value in feat_settings.items():
111 |     print(key, ' ---> ', value)
112 |     
113 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_featureprep_envclassifier.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =====================================
  4 | Feature Extraction for Classification
  5 | =====================================
  6 | 
  7 | Extract acoustic features from labeled data for 
  8 | training an environment or speech classifier.
  9 | 
 10 | To see how soundpy implements this, see `soundpy.builtin.envclassifier_feats`.
 11 | """
 12 | 
 13 | 
 14 | ###############################################################################################
 15 | # 
 16 | 
 17 | 
 18 | #####################################################################
 19 | import os, sys
 20 | import inspect
 21 | currentdir = os.path.dirname(os.path.abspath(
 22 |     inspect.getfile(inspect.currentframe())))
 23 | parentdir = os.path.dirname(currentdir)
 24 | parparentdir = os.path.dirname(parentdir)
 25 | packagedir = os.path.dirname(parparentdir)
 26 | sys.path.insert(0, packagedir)
 27 | 
 28 | import soundpy as sp 
 29 | import IPython.display as ipd
 30 | package_dir = '../../../'
 31 | os.chdir(package_dir)
 32 | sp_dir = package_dir
 33 | 
 34 | ######################################################
 35 | # Prepare for Extraction: Data Organization
 36 | # -----------------------------------------
 37 | 
 38 | ######################################################
 39 | # I will use a sample speech commands data set:
 40 | 
 41 | ##########################################################
 42 | # Designate path relevant for accessing audiodata
 43 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir)
 44 | 
 45 | ######################################################
 46 | # Choose Feature Type 
 47 | # ~~~~~~~~~~~~~~~~~~~
 48 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
 49 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
 50 | 
 51 | feature_type = 'fbank'
 52 | 
 53 | ######################################################
 54 | # Set Duration of Audio 
 55 | # ~~~~~~~~~~~~~~~~~~~~~
 56 | # How much audio in seconds used from each audio file.
 57 | # The example noise and speech files are only 1 second long
 58 | dur_sec = 1
 59 | 
 60 | 
 61 | #############################################################
 62 | # Built-In Functionality - soundpy extracts the features for you
 63 | # ---------------------------------------------------------------
 64 | 
 65 | ############################################################
 66 | # Define which data to use and which features to extract
 67 | # Everything else is based on defaults. A feature folder with
 68 | # the feature data will be created in the current working directory.
 69 | # (Although, you can set this under the parameter `data_features_dir`)
 70 | # `visualize` saves periodic images of the features extracted.
 71 | # This is useful if you want to know what's going on during the process.
 72 | extraction_dir = sp.envclassifier_feats(data_dir, 
 73 |                                           feature_type=feature_type, 
 74 |                                           dur_sec=dur_sec,
 75 |                                           visualize=True);
 76 | 
 77 | ################################################################
 78 | # The extracted features, extraction settings applied, and 
 79 | # which audio files were assigned to which datasets
 80 | # will be saved in the following directory:
 81 | extraction_dir
 82 | 
 83 | ############################################################
 84 | # Logged Information
 85 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 86 | # Let's have a look at the files in the extraction_dir. The files ending 
 87 | # with .npy extension contain the feature data; the .csv files contain 
 88 | # logged information. 
 89 | featfiles = list(extraction_dir.glob('*.*'))
 90 | for f in featfiles:
 91 |     print(f.name)
 92 |   
 93 | ############################################################
 94 | # Feature Settings
 95 | # ~~~~~~~~~~~~~~~~~~
 96 | # Since much was conducted behind the scenes, it's nice to know how the features
 97 | # were extracted, for example, the sample rate and number of frequency bins applied, etc.
 98 | feat_settings = sp.utils.load_dict(
 99 |     extraction_dir.joinpath('log_extraction_settings.csv'))
100 | for key, value in feat_settings.items():
101 |     print(key, ' ---> ', value)
102 |     
103 |     
104 | ############################################################
105 | # Labeled Data
106 | # ~~~~~~~~~~~~~~~~~~
107 | # These are the labels and their encoded values:
108 | encode_dict = sp.utils.load_dict(
109 |     extraction_dir.joinpath('dict_encode.csv'))
110 | for key, value in encode_dict.items():
111 |     print(key, ' ---> ', value)
112 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_filter_out_noise.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | """
  4 | ===========================
  5 | Filter Out Background Noise
  6 | ===========================
  7 | 
  8 | Filter out background noise from noisy speech signals. 
  9 | 
 10 | To see how soundpy implements this, see `soundpy.builtin.filtersignal`.
 11 | 
 12 | As a general note for filtering, the Wiener Filter is the default filter for soundpy. It seems to filter signals more consequently than the Band Spectral Subtraction Filter.
 13 | """
 14 | 
 15 | 
 16 | ###############################################################################################
 17 | # 
 18 | 
 19 | 
 20 | #####################################################################
 21 | 
 22 | # Let's import soundpy, and ipd for playing audio data
 23 | import soundpy as sp
 24 | import IPython.display as ipd
 25 | 
 26 | 
 27 | ######################################################
 28 | # Define the noisy and clean speech audio files.
 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 30 | # Note: these files are available in the soundpy repo.
 31 | # Designate path relevant for accessing audiodata
 32 | sp_dir = '../../../'
 33 | 
 34 | ##########################################################
 35 | # Noise sample:
 36 | noise = '{}audiodata/background_samples/traffic.wav'.format(sp_dir)
 37 | noise = sp.string2pathlib(noise)
 38 | speech = '{}audiodata/python.wav'.format(sp_dir)
 39 | speech = sp.utils.string2pathlib(speech)
 40 | 
 41 | ##########################################################
 42 | # For filtering, we will set the sample rate to be quite high:
 43 | sr = 48000
 44 | 
 45 | ##########################################################
 46 | # Create noisy speech signal as SNR 10
 47 | noisy, snr_measured = sp.dsp.add_backgroundsound(
 48 |     speech, 
 49 |     noise, 
 50 |     sr = sr, 
 51 |     snr = 10, 
 52 |     total_len_sec = 2, 
 53 |     pad_mainsound_sec = 0.5)
 54 | 
 55 | ##########################################################
 56 | # Hear and see the noisy speech 
 57 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 58 | 
 59 | ipd.Audio(noisy,rate=sr)
 60 | 
 61 | ##########################################################
 62 | sp.plotsound(noisy, sr=sr, feature_type='signal', 
 63 |                title = 'Noisy Speech', subprocess=True)
 64 | 
 65 | 
 66 | ##########################################################
 67 | # Hear and see the clean speech 
 68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 69 | s, sr = sp.loadsound(speech, sr=sr)
 70 | ipd.Audio(s,rate=sr)
 71 | 
 72 | ##########################################################
 73 | sp.plotsound(s, sr=sr, feature_type='signal', 
 74 |                title = 'Clean Speech', subprocess=True)
 75 | 
 76 | 
 77 | ##########################################################
 78 | # Filter the noisy speech
 79 | # ^^^^^^^^^^^^^^^^^^^^^^^
 80 | 
 81 | ##########################################################
 82 | # Wiener Filter 
 83 | # ~~~~~~~~~~~~~
 84 | 
 85 | ##########################################################
 86 | # Let's filter with a Wiener filter:
 87 | noisy_wf, sr = sp.filtersignal(noisy,
 88 |                                  sr = sr,
 89 |                                  filter_type = 'wiener') # default
 90 | 
 91 | ##########################################################
 92 | ipd.Audio(noisy_wf,rate=sr)
 93 | 
 94 | ##########################################################
 95 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal', 
 96 |                title = 'Noisy Speech: Wiener Filter', 
 97 |                subprocess=True)
 98 | 
 99 | #################################################################
100 | # Wiener Filter with Postfilter
101 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
102 | 
103 | ##########################################################
104 | # Let's filter with a Wiener filter and postfilter
105 | noisy_wfpf, sr = sp.filtersignal(noisy,
106 |                                  sr = sr,
107 |                                  filter_type = 'wiener',
108 |                                  apply_postfilter = True) 
109 | 
110 | ##########################################################
111 | ipd.Audio(noisy_wfpf,rate=sr)
112 | 
113 | ##########################################################
114 | sp.plotsound(noisy_wfpf, sr=sr, feature_type = 'signal', 
115 |                title = 'Noisy Speech: Wiener Filter with Postfilter', 
116 |                subprocess=True)
117 | 
118 | #################################################################
119 | # Band Spectral Subtraction
120 | # ~~~~~~~~~~~~~~~~~~~~~~~~~
121 | 
122 | ##########################################################
123 | # Let's filter using band spectral subtraction
124 | noisy_bs, sr = sp.filtersignal(noisy,
125 |                                  sr = sr,
126 |                                  filter_type = 'bandspec') 
127 | 
128 | ##########################################################
129 | ipd.Audio(noisy_bs,rate=sr)
130 | 
131 | ##########################################################
132 | sp.plotsound(noisy_bs, sr = sr, feature_type = 'signal', 
133 |                title = 'Noisy Speech: Band Spectral Subtraction', 
134 |                subprocess=True)
135 | 
136 | 
137 | #################################################################
138 | # Band Spectral Subtraction with Postfilter
139 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
140 | 
141 | #########################################################################
142 | # Finally, let's filter using band spectral subtraction with a postfilter
143 | noisy_bspf, sr = sp.filtersignal(noisy,
144 |                                  sr = sr,
145 |                                  filter_type = 'bandspec', 
146 |                                  apply_postfilter = True) 
147 | 
148 | ##########################################################
149 | ipd.Audio(noisy_bspf,rate=sr)
150 | 
151 | ##########################################################
152 | sp.plotsound(noisy_bspf, sr = sr, feature_type = 'signal', 
153 |                title = 'Noisy Speech: Band Spectral Subtraction with Postfilter', 
154 |                subprocess=True)
155 | 
156 | 
157 | ##########################################################
158 | # Filter: increase the scale
159 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^
160 | 
161 | ##########################################################
162 | # Let's filter with a Wiener filter:
163 | filter_scale = 5
164 | noisy_wf, sr = sp.filtersignal(noisy,
165 |                                  sr=sr,
166 |                                  filter_type = 'wiener',
167 |                                  filter_scale = filter_scale)
168 | 
169 | ##########################################################
170 | # Wiener Filter
171 | # ~~~~~~~~~~~~~
172 | 
173 | ##########################################################
174 | ipd.Audio(noisy_wf,rate=sr)
175 | 
176 | ##########################################################
177 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal', 
178 |                title = 'Noisy Speech: Wiener Filter Scale {}'.format(filter_scale), 
179 |                subprocess=True)
180 | 
181 | #################################################################
182 | # Wiener Filter with Postfilter
183 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
184 | 
185 | ##########################################################
186 | # Let's filter with a Wiener filter and postfilter
187 | noisy_wfpf, sr = sp.filtersignal(noisy,
188 |                                  sr = sr,
189 |                                  filter_type = 'wiener',
190 |                                  apply_postfilter = True,
191 |                                  filter_scale = filter_scale) 
192 | 
193 | ##########################################################
194 | ipd.Audio(noisy_wfpf,rate = sr)
195 | 
196 | ##########################################################
197 | sp.plotsound(noisy_wfpf, sr = sr, feature_type = 'signal', 
198 |                title = 'Noisy Speech: Wiener Filter with Postfilter Scale {}'.format(filter_scale),
199 |                subprocess=True)
200 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_implement_denoiser.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =================================
  4 | Implement a Denoising Autoencoder
  5 | =================================
  6 | 
  7 | Implement denoising autoencoder to denoise a noisy speech signal.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_run`.
 10 | """
 11 | 
 12 | 
 13 | ############################################################################################
 14 | # 
 15 | 
 16 | #####################################################################
 17 | # Let's import soundpy and other packages
 18 | import soundpy as sp
 19 | import numpy as np
 20 | # for playing audio in this notebook:
 21 | import IPython.display as ipd
 22 | 
 23 | #####################################################################
 24 | # As well as the deep learning component of soundpy
 25 | from soundpy import models as spdl
 26 | 
 27 | ######################################################
 28 | # Prepare for Implementation: Data Organization
 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 30 | 
 31 | ##########################################################
 32 | # Set path relevant for audio data for this example
 33 | sp_dir = '../../../'
 34 | 
 35 | ######################################################
 36 | # Set model pathway
 37 | # ~~~~~~~~~~~~~~~~~
 38 | # Currently, this expects a model saved with weights, with a .h5 extension.
 39 | # (See `model` below)
 40 | 
 41 | ######################################################
 42 | # The soundpy repo offers a pre-trained denoiser, which we'll use.
 43 | model = '{}audiodata/models/'.format(sp_dir)+\
 44 |     'denoiser/example_denoiser_stft.h5'
 45 | # ensure is a pathlib.PosixPath object
 46 | print(model)
 47 | model = sp.utils.string2pathlib(model)
 48 | model_dir = model.parent
 49 | 
 50 | #########################################################
 51 | # What is in this folder?
 52 | files = list(model_dir.glob('*.*'))
 53 | for f in files:
 54 |     print(f.name)
 55 |   
 56 | ######################################################
 57 | # Provide dictionary with feature extraction settings
 58 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 59 | 
 60 | #########################################################
 61 | # If soundpy extracts features for you, a 'log_extraction_settings.csv' 
 62 | # file will be saved, which includes relevant feature settings for implementing 
 63 | # the model; see `soundpy.feats.save_features_datasets`
 64 | feat_settings = sp.utils.load_dict(
 65 |     model_dir.joinpath('log_extraction_settings.csv'))
 66 | for key, value in feat_settings.items():
 67 |     print(key, ' --> ', value)
 68 |     # change objects that were string to original format
 69 |     import ast
 70 |     try:
 71 |         feat_settings[key] = ast.literal_eval(value)
 72 |     except ValueError:
 73 |         pass
 74 |     except SyntaxError:
 75 |         pass
 76 | 
 77 | #########################################################
 78 | # For the purposes of plotting, let's use some of the settings defined:
 79 | feature_type = feat_settings['feature_type']
 80 | sr = feat_settings['sr']
 81 | 
 82 | ######################################################
 83 | # Provide new audio for the denoiser to denoise!
 84 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 85 | 
 86 | #########################################################
 87 | # We'll use sample speech from the soundpy repo:
 88 | speech = sp.string2pathlib('{}audiodata/python.wav'.format(sp_dir))
 89 | s, sr = sp.loadsound(speech, sr=sr)
 90 | 
 91 | #########################################################
 92 | # Let's add some white noise (10 SNR)
 93 | s_n = sp.augment.add_white_noise(s, sr=sr, snr=10)
 94 | 
 95 | ##############################################################
 96 | # What does the noisy audio sound like?
 97 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 98 | ipd.Audio(s_n,rate=sr)
 99 | 
100 | ##############################################################
101 | # What does the noisy audio look like?
102 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
103 | sp.plotsound(s_n, sr = sr, feature_type='signal', subprocess=True)
104 | 
105 | ##############################################################
106 | # What does the clean audio sound like?
107 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 | ipd.Audio(s,rate=sr)
109 | 
110 | ##############################################################
111 | # What does the clean audio look like?
112 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
113 | sp.plotsound(s, sr = sr, feature_type='signal', subprocess=True)
114 | 
115 | #########################################################################
116 | # Built-In Denoiser Functionality
117 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
118 | 
119 | ##############################################################
120 | # We just need to feed the model path, the noisy sample path, and 
121 | # the feature settings dictionary we looked at above.
122 | y, sr = spdl.denoiser_run(model, s_n, feat_settings)
123 | 
124 | ##########################################################
125 | # How does the output sound?
126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~
127 | ipd.Audio(y,rate=sr)
128 | 
129 | ##########################################################
130 | # How does is the output look? 
131 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
132 | sp.plotsound(y, sr=sr, feature_type = feature_type, subprocess=True)
133 | 
134 | ##########################################################
135 | # How do the features compare?
136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137 | 
138 | ##########################################################
139 | # STFT features of the noisy input speech:
140 | sp.plotsound(s_n, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
141 |                title = 'Noisy input: STFT features', subprocess=True)
142 | 
143 | ##########################################################
144 | # STFT features of the output
145 | sp.plotsound(y, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
146 |                title = 'Denoiser Output: STFT features', subprocess=True)
147 | 
148 | ##########################################################
149 | # STFT features of the clean version of the audio:
150 | sp.plotsound(s, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
151 |                title = 'Clean "target" audio: STFT features', subprocess=True)
152 | 
153 | 
154 | ##########################################################
155 | # It's not perfect but for a pretty simple implementation, the noise is gone
156 | # and you can hear the person speaking. Pretty cool! 
157 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_signals_and_features.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =======================
  4 | Create and Plot Signals
  5 | =======================
  6 | 
  7 | Create and plot signals / noise; combine them at a specific SNR.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.dsp.generate_sound`, 
 10 | `soundpy.dsp.generate_noise` and `soundpy.dsp.add_backgroundsound`.
 11 | """
 12 | 
 13 | 
 14 | ###############################################################################################
 15 | #  
 16 | 
 17 | #####################################################################
 18 | # Let's import soundpy
 19 | import soundpy as sp
 20 | 
 21 | ###########################################################################
 22 | # Create a Signal
 23 | # ^^^^^^^^^^^^^^^
 24 | 
 25 | ########################################################################
 26 | # First let's set what sample rate we want to use
 27 | sr = 44100
 28 | 
 29 | 
 30 | #########################################################################
 31 | # Let's create a signal of 10 Hz 
 32 | sig1_hz = 10
 33 | sig1, sr = sp.generate_sound(freq=sig1_hz, amplitude = 0.4, sr=sr, dur_sec=1)
 34 | sp.plotsound(sig1, sr=sr, feature_type = 'signal',
 35 |                title = 'Signal: {} Hz'.format(sig1_hz), subprocess=True)
 36 | 
 37 | 
 38 | #########################################################################
 39 | # Let's create a signal of 20 Hz
 40 | sig2_hz = 20 
 41 | sig2, sr = sp.generate_sound(freq=sig2_hz, amplitude= 0.4, sr=sr, dur_sec=1)
 42 | sp.plotsound(sig2, sr=sr, feature_type = 'signal',
 43 |                title = 'Signal: {} Hz'.format(sig2_hz), subprocess=True)
 44 | 
 45 | ###########################################################################
 46 | # Combine Signals 
 47 | # ^^^^^^^^^^^^^^^
 48 | 
 49 | 
 50 | #########################################################################
 51 | # Add them together and see what they look like:
 52 | sig3 = sig1 + sig2
 53 | sp.plotsound(sig3, sr=sr, feature_type = 'signal', 
 54 |                title='Mixed Signals: {} Hz + {} Hz'.format(sig1_hz, sig2_hz), 
 55 |                subprocess=True)
 56 | 
 57 | 
 58 | ##########################################################################
 59 | # Generate Pseudo-Random Noise
 60 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 61 | 
 62 | 
 63 | #########################################################################
 64 | # Create noise to add to the signal:
 65 | noise = sp.generate_noise(len(sig3), amplitude=0.02, random_seed=40)
 66 | sp.plotsound(noise, sr=sr, feature_type = 'signal',
 67 |                title='Random Noise', subprocess=True)
 68 | 
 69 | ###########################################################################
 70 | # Control SNR: Adding a Background Sound
 71 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 72 | 
 73 | #########################################################################
 74 | # Add noise at signal-to-noise ratio of 40
 75 | sig_noisy, snr = sp.dsp.add_backgroundsound(
 76 |     audio_main = sig3, 
 77 |     audio_background = noise, 
 78 |     sr = sr,
 79 |     snr = 40,
 80 |     clip_at_zero = False)
 81 | 
 82 | # keep energy between 1 and -1 
 83 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
 84 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 40 SNR',
 85 |              subprocess=True)
 86 | 
 87 | #########################################################################
 88 | # Add noise at signal-to-noise ratio of 20
 89 | sig_noisy, snr = sp.dsp.add_backgroundsound(
 90 |     audio_main = sig3, 
 91 |     audio_background = noise,
 92 |     sr = sr,
 93 |     snr = 20)
 94 | # keep energy between 1 and -1 
 95 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
 96 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 20 SNR',
 97 |              subprocess=True)
 98 | 
 99 | #########################################################################
100 | # Add noise at signal-to-noise ratio of 10
101 | sig_noisy, snr = sp.dsp.add_backgroundsound(
102 |     audio_main = sig3, 
103 |     audio_background = noise,
104 |     sr = sr,
105 |     snr = 10)
106 | # keep energy between 1 and -1 
107 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
108 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 10 SNR',
109 |              subprocess=True)
110 | 
111 | #########################################################################
112 | # Add noise at signal-to-noise ratio of 0
113 | sig_noisy, snr = sp.dsp.add_backgroundsound(
114 |     audio_main = sig3,
115 |     audio_background = noise,
116 |     sr = sr,
117 |     snr = 0)
118 | # keep energy between 1 and -1 
119 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
120 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 0 SNR',
121 |              subprocess=True)
122 | 
123 | 
124 | #########################################################################
125 | # Add noise at signal-to-noise ratio of -10
126 | sig_noisy, snr = sp.dsp.add_backgroundsound(
127 |     audio_main = sig3, 
128 |     audio_background = noise,
129 |     sr = sr,
130 |     snr = -10)
131 | # keep energy between 1 and -1 
132 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
133 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: -10 SNR',
134 |              subprocess=True)
135 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_train_classifier.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | ============================
  4 | Train an Acoustic Classifier
  5 | ============================
  6 | 
  7 | Train an acoustic classifier on speech or noise features.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_train`.
 10 | """
 11 | 
 12 | ###############################################################################################
 13 | #
 14 | import os, sys
 15 | import inspect
 16 | currentdir = os.path.dirname(os.path.abspath(
 17 |     inspect.getfile(inspect.currentframe())))
 18 | parentdir = os.path.dirname(currentdir)
 19 | parparentdir = os.path.dirname(parentdir)
 20 | packagedir = os.path.dirname(parparentdir)
 21 | sys.path.insert(0, packagedir)
 22 | 
 23 | import matplotlib.pyplot as plt
 24 | import IPython.display as ipd
 25 | package_dir = '../../../'
 26 | os.chdir(package_dir)
 27 | sp_dir = package_dir
 28 | 
 29 | 
 30 | #####################################################################
 31 | # Let's import soundpy for handling sound
 32 | import soundpy as sp
 33 | #####################################################################
 34 | # As well as the deep learning component of soundpy
 35 | from soundpy import models as spdl
 36 | 
 37 | 
 38 | ######################################################
 39 | # Prepare for Training: Data Organization
 40 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 41 | 
 42 | ##########################################################
 43 | # Set path relevant for audio data for this example
 44 | 
 45 | ######################################################
 46 | # I will load previously extracted features (from the Speech Commands Dataset) 
 47 | # See `soundpy.feats.save_features_datasets` or `soundpy.builtin.envclassifier_feats`
 48 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
 49 |     'envclassifier/example_feats_fbank/'
 50 | 
 51 | #########################################################
 52 | # What is in this folder?
 53 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
 54 | files = list(feature_extraction_dir.glob('*.*'))
 55 | for f in files:
 56 |     print(f.name)
 57 |   
 58 | #########################################################
 59 | # The .npy files contain the features themselves, in train, validation, and
 60 | # test datasets:
 61 | files = list(feature_extraction_dir.glob('*.npy'))
 62 | for f in files:
 63 |     print(f.name)
 64 |   
 65 | #########################################################
 66 | # The .csv files contain information about how the features were extracted
 67 | files = list(feature_extraction_dir.glob('*.csv'))
 68 | for f in files:
 69 |     print(f.name)
 70 | 
 71 | #########################################################
 72 | # We'll have a look at which features were extracted and other settings:
 73 | feat_settings = sp.utils.load_dict(
 74 |     feature_extraction_dir.joinpath('log_extraction_settings.csv'))
 75 | for key, value in feat_settings.items():
 76 |     print(key, ' --> ', value)
 77 |     
 78 | #########################################################
 79 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
 80 |     
 81 | #########################################################
 82 | # We'll have a look at the audio files that were assigned 
 83 | # to the train, val, and test datasets. 
 84 | audio_datasets = sp.utils.load_dict(
 85 |     feature_extraction_dir.joinpath('dataset_audiofiles.csv'))
 86 | count = 0
 87 | for key, value in audio_datasets.items():
 88 |     print(key, ' --> ', value)
 89 |     count += 1
 90 |     if count > 5:
 91 |         break
 92 | 
 93 | #############################################################
 94 | # Built-In Functionality: soundpy does everything for you
 95 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 96 | # For more about this function, see `soundpy.models.builtin.envclassifier_train`.
 97 | 
 98 | #############################################################
 99 | model_dir, history = spdl.envclassifier_train(
100 |     feature_extraction_dir = feature_extraction_dir,
101 |     epochs = 10,
102 |     patience = 5)
103 | 
104 | #############################################################
105 | # Where the model and logs are located:
106 | model_dir
107 | 
108 | #############################################################
109 | # Let's plot how the model performed (on this mini dataset)
110 | import matplotlib.pyplot as plt
111 | plt.clf()
112 | plt.plot(history.history['accuracy'])
113 | plt.plot(history.history['val_accuracy'])
114 | plt.title('model accuracy')
115 | plt.ylabel('accuracy')
116 | plt.xlabel('epoch')
117 | plt.legend(['train', 'val'], loc='upper right')
118 | plt.savefig('accuracy.png')
119 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_train_denoiser.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =============================
  4 | Train a Denoising Autoencoder
  5 | =============================
  6 | 
  7 | Train a denoising autoencoder with clean and noisy acoustic features.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_train`, 
 10 | `soundpy.builtin.denoiser_feats` and `soundpy.builtin.create_denoise_data`.
 11 | """
 12 | 
 13 | 
 14 | ###############################################################################################
 15 | # 
 16 | import os, sys
 17 | import inspect
 18 | currentdir = os.path.dirname(os.path.abspath(
 19 |     inspect.getfile(inspect.currentframe())))
 20 | parentdir = os.path.dirname(currentdir)
 21 | parparentdir = os.path.dirname(parentdir)
 22 | packagedir = os.path.dirname(parparentdir)
 23 | sys.path.insert(0, packagedir)
 24 | 
 25 | import matplotlib.pyplot as plt
 26 | import IPython.display as ipd
 27 | package_dir = '../../../'
 28 | os.chdir(package_dir)
 29 | sp_dir = package_dir
 30 | 
 31 | 
 32 | #####################################################################
 33 | # Let's import soundpy for handling sound
 34 | import soundpy as sp
 35 | #####################################################################
 36 | # As well as the deep learning component of soundpy
 37 | from soundpy import models as spdl
 38 | 
 39 | 
 40 | ######################################################
 41 | # Prepare for Training: Data Organization
 42 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 43 | 
 44 | ##########################################################
 45 | # Designate path relevant for accessing audiodata
 46 | 
 47 | 
 48 | ######################################################
 49 | # I will load previously extracted features (sample data), see `soundpy.feats.save_features_datasets` or `soundpy.builtin.denoiser_feats`
 50 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
 51 |     'denoiser/example_feats_fbank/'
 52 | 
 53 | #########################################################
 54 | # What is in this folder?
 55 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
 56 | files = list(feature_extraction_dir.glob('*.*'))
 57 | for f in files:
 58 |     print(f.name)
 59 |   
 60 | #########################################################
 61 | # The .npy files contain the features themselves, in train, validation, and
 62 | # test datasets:
 63 | files = list(feature_extraction_dir.glob('*.npy'))
 64 | for f in files:
 65 |     print(f.name)
 66 |   
 67 | #########################################################
 68 | # The .csv files contain information about how the features were extracted
 69 | files = list(feature_extraction_dir.glob('*.csv'))
 70 | for f in files:
 71 |     print(f.name)
 72 | 
 73 | #########################################################
 74 | # We'll have a look at which features were extracted and other settings:
 75 | feat_settings = sp.utils.load_dict(
 76 |     feature_extraction_dir.joinpath('log_extraction_settings.csv'))
 77 | for key, value in feat_settings.items():
 78 |     print(key, ' --> ', value)
 79 |     
 80 | #########################################################
 81 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
 82 |     
 83 | #########################################################
 84 | # We'll have a look at the audio files that were assigned 
 85 | # to the train, val, and test datasets.
 86 | audio_datasets = sp.utils.load_dict(
 87 |     feature_extraction_dir.joinpath('audiofiles_datasets_clean.csv'))
 88 | count = 0
 89 | for key, value in audio_datasets.items():
 90 |     print(key, ' --> ', value)
 91 |     count += 1
 92 |     if count > 5:
 93 |         break
 94 | 
 95 | #############################################################
 96 | # Built-In Functionality: soundpy does everything for you
 97 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 98 | # For more about this, see `soundpy.builtin.denoiser_train`.
 99 | 
100 | #############################################################
101 | model_dir, history = spdl.denoiser_train(
102 |     feature_extraction_dir = feature_extraction_dir,
103 |     epochs = 10)
104 | 
105 | #########################################################
106 | 
107 | 
108 | #############################################################
109 | # Where the model and logs are located:
110 | model_dir
111 | 
112 | 
113 | #############################################################
114 | # Let's plot how the model performed (on this mini dataset)
115 | import matplotlib.pyplot as plt
116 | plt.plot(history.history['loss'])
117 | plt.plot(history.history['val_loss'])
118 | plt.title('model loss')
119 | plt.ylabel('loss')
120 | plt.xlabel('epoch')
121 | plt.legend(['train', 'val'], loc='upper right')
122 | plt.savefig('loss.png')
123 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/exceptions.rst:
--------------------------------------------------------------------------------
1 | 
2 | Customized Errors
3 | -----------------
4 | 
5 | .. automodule:: soundpy.exceptions
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/feats.rst:
--------------------------------------------------------------------------------
1 | 
2 | Extract and manipulate audio features
3 | -------------------------------------
4 | 
5 | .. automodule:: soundpy.feats
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/files.rst:
--------------------------------------------------------------------------------
1 | 
2 | Working with audio files
3 | ------------------------
4 | 
5 | .. automodule:: soundpy.files
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/filters.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Filters: Wiener and Band Spectral Subtraction
 3 | ---------------------------------------------
 4 | 
 5 | .. automodule:: soundpy.filters
 6 |    :members:
 7 |    :undoc-members:
 8 |    :show-inheritance:
 9 | 
10 | .. autoclass:: soundpy.filters.FilterSettings
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 |    
15 |    .. automethod:: __init__
16 |    
17 | .. autoclass:: soundpy.filters.Filter
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 |    
22 |    .. automethod:: __init__
23 |    
24 |    
25 | .. autoclass:: soundpy.filters.WienerFilter
26 |    :members:
27 |    :undoc-members:
28 |    :show-inheritance:
29 |    
30 |    .. automethod:: __init__
31 |    
32 |    
33 | .. autoclass:: soundpy.filters.BandSubtraction
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 |    
38 |    .. automethod:: __init__
39 |    
40 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/index.rst:
--------------------------------------------------------------------------------
 1 | .. SoundPy documentation master file, created by
 2 |    sphinx-quickstart on Mon Jun 15 11:57:18 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | SoundPy v0.1.0a3
 7 | ================  
 8 | 
 9 | Welcome to the docs!
10 | --------------------
11 | 
12 | To access documentation for specific versions:
13 | 
14 | .. toctree::
15 |    :maxdepth: 1
16 | 
17 |    versions.rst
18 | 
19 | 
20 | About SoundPy
21 | -------------
22 |    
23 | SoundPy is a research based Python package_ for exploring and experimenting with sound and deep learning. NOTE: SoundPy is in alpha stage of development; please forgive any bugs that pop up, and also feel free/encouraged to open an issue_. 
24 | 
25 | Those who might find this useful: 
26 | 
27 | * speech and sound enthusiasts
28 | * digital signal processing / mathematics / physics / acoustics enthusiasts
29 | * deep learning enthusiasts
30 | * researchers
31 | * linguists
32 | * psycholinguists
33 | 
34 | The main goal of SoundPy is to provide the code and functionality with more context via visualization, research, and mathematics. Most of the resources used to build the functionality stems from publicly available research and datasets.
35 | 
36 | As it covers quite a large range, from audio file conversion to implementation of trained neural networks, the purpose of SoundPy is not to be the perfect implementation of all functions (although that is also a goal :P ), but rather a peak into how they *can* be implemented, hopefully offering people a foundation for trying out different ways of implementation (feature extraction, building neural networks, etc.).
37 | 
38 | This project is still in the beginning stages and has a lot of room for growth, especially with contributors having a background / knowlege in data science, computer science, machine and deep learning, physics, acoustics, or dsp. Contributors from other backgrounds are also welcome! If you'd like SoundPy to do something it doesn't, try making it or create an issue.
39 | 
40 | .. _PyPI: https://pypi.org/project/soundpy/
41 | 
42 | .. _package: https://github.com/a-n-rose/Python-Sound-Tool/tree/development
43 | 
44 | .. _issue: https://github.com/a-n-rose/Python-Sound-Tool/issues
45 | 
46 | .. toctree:: 
47 |    :maxdepth: 2
48 |    
49 |    example_cases.rst
50 |    readme.rst
51 | 
52 |    
53 | .. toctree:: 
54 |    :maxdepth: 1
55 |    
56 |    changelog.rst
57 |    
58 | * :ref:`genindex`
59 | * :ref:`modindex`
60 | * :ref:`search`
61 | 
62 | :Author:
63 |     Aislyn Rose 
64 |     
65 |     rose.aislyn.noelle@gmail.com
66 |     
67 |     webpage_
68 |     
69 |     github_
70 |     
71 | .. _webpage: https://a-n-rose.github.io/
72 |  
73 | .. _github : https://github.com/a-n-rose
74 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/model_dataprep.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Feeding large datasets to models
 3 | --------------------------------
 4 | 
 5 | .. autoclass:: soundpy.models.dataprep.Generator
 6 |    :members:
 7 |    :undoc-members:
 8 |    :show-inheritance:
 9 |    
10 |    .. automethod:: __init__
11 | 
12 |    
13 | .. automodule:: soundpy.models.dataprep
14 |    :members:
15 |    :undoc-members:
16 |    :show-inheritance:
17 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/modelsetup.rst:
--------------------------------------------------------------------------------
1 | 
2 | Additional model setup (e.g. Early Stopping)
3 | --------------------------------------------
4 | 
5 | .. automodule:: soundpy.models.modelsetup
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/modules.rst:
--------------------------------------------------------------------------------
 1 | ==============================
 2 | SoundPy Functionality v0.1.0a3
 3 | ==============================
 4 | 
 5 | .. include:: builtin_sp.rst
 6 | 
 7 | .. include:: builtin_spdl.rst
 8 | 
 9 | .. include:: augment.rst
10 | 
11 | .. include:: files.rst
12 | 
13 | .. include:: datasets.rst
14 | 
15 | .. include:: dsp.rst
16 | 
17 | .. include:: filters.rst
18 | 
19 | .. include:: feats.rst
20 | 
21 | .. include:: template_models.rst
22 | 
23 | .. include:: modelsetup.rst
24 | 
25 | .. include:: model_dataprep.rst
26 | 
27 | .. include:: utils.rst
28 | 
29 | .. include:: exceptions.rst
30 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: modules.rst
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/template_models.rst:
--------------------------------------------------------------------------------
1 | Template deep neural networks
2 | -----------------------------
3 | 
4 | .. automodule:: soundpy.models.template_models
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/utils.rst:
--------------------------------------------------------------------------------
1 | 
2 | Other useful non-specific functionality
3 | ---------------------------------------
4 | 
5 | .. automodule:: soundpy.utils
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/0.1.0a3/versions.rst:
--------------------------------------------------------------------------------
 1 | ******************************************
 2 | SoundPy Versions Available as PyPI Package
 3 | ******************************************
 4 | 
 5 | .. toctree::
 6 |     :maxdepth: 1
 7 | 
 8 |     0.1.0a2/index.rst
 9 | 
10 | 


--------------------------------------------------------------------------------
/docs/source/augment.rst:
--------------------------------------------------------------------------------
1 | 
2 | Augment audio data
3 | ------------------
4 | 
5 | .. automodule:: soundpy.augment
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/builtin_sp.rst:
--------------------------------------------------------------------------------
1 | 
2 | Built-In Functionality (non Deep Learning)
3 | ------------------------------------------
4 | 
5 | .. automodule:: soundpy.builtin
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/builtin_spdl.rst:
--------------------------------------------------------------------------------
1 | 
2 | Built-In Functionality (Deep Learning)
3 | --------------------------------------
4 | 
5 | .. automodule:: soundpy.models.builtin
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
 1 | *********
 2 | Changelog
 3 | *********
 4 | 
 5 | v0.1.0a
 6 | =======
 7 | 
 8 | 
 9 | v0.1.0a3
10 | --------
11 | 2021-04-09
12 | 
13 | Bug fixes
14 |    -  no longer use Librosa for feature extraction: allow easier implementation of augmentations, especially during training. 
15 |    -  `soundpy.feats.plot` now uses parameter `subprocess` to allow for different backends to be applied, depending on when funciton is called. For example, if plotting from within a Generator while training, `subprocess` should be set to True, and the 'Agg' backend will be applied. Otherwise, 'TkAgg' backend is used. Fixes issues with multi-threading.
16 |    -  Fixed generator and Tensorflow issue: with Tensorflow 2.2.0+ the models in `soundpy.models.builtin` that were trained via generator failed. Use `tensorflow.data.Dataset.from_generator` to feed generator data to models.
17 |    -  Improved `clip_at_zero`.
18 | 
19 | Features
20 |    -  Python 3.8 can now be used.
21 |    -  throw depreciation warning for parameters `context_window` or `frames_per_sample` as these "features" will be removed from feature extraction. Rather the features can be reshaped post feature extraction.
22 |    -  added `timestep`, `axis_timestep`, `context_window`, `axis_context_window`  and `combine_axes_0_1` paremeters to  `soundpy.models.Generator`:  allow more control over shape of the features.
23 |    -  can run `soundpy.models.builtin.envclassifier_extract_train` to run with pre-extracted val and test features. 
24 |    -  `soundpy.feats.plotsound`, `soundpy.feats.plot_vad` and `soundpy.feats.plot_dom_freq` all can plot stereo sound: for each channel in a stereo signal, a plot is either generated or saved. If a filename already exists, a date stamp is added to filename to avoid overwriting images.
25 |    - allow `grayscale2color` to be applied to 2D data.
26 | 
27 | Breaking changes
28 |    -  `soundpy.models.Generator` uses parameter `normalize` instaed of `normalized`. Found this to be more intuitive. If `normalize` is set to True, data will be normalized. Before, if `normalized` was set to True, data would not be normalized.
29 |    -  removed `add_tensor_last` and `add_tensor_first`: require adding of tensors (for keras) to be included in parameter `desired_input_shape`.
30 |    
31 | Other changes 
32 |    -  CPU soundpy can use Tensorflow 2.1.0, 2.2.0 and 2.3.0. Dockerfile still uses Tensorflow 2.1.0 as it is still compatible with updated code.
33 |    -  `soundpy.models.builtin.implement_denoiser` raises warning if cleaned features cannot be converted to raw audio samples.
34 | 
35 |    
36 | v0.1.0a2
37 | --------
38 | 2020-08-13
39 | 
40 | 
41 | Bug fixes
42 |    -  added `use_beg_ms` parameter in `soundpy.dsp.vad`: improved VAD recognition of silences post speech.
43 | 
44 | Features
45 |    -  added GPU option: provide instructions and Docker image for running SoundPy with GPU
46 |    -  added `extend_window_ms` paremeter to `soundpy.feats.get_vad_samples` and `soundpy.feats.get_vad_stft`:  can extend VAD window if desired. Useful in higher SNR environments.
47 |    -  added `soundpy.feats.get_samples_clipped` and `soundpy.feats.get_stft_clipped` to clip off beginning and ending silences.
48 |    -  added `beg_end_clipped` parameter to `soundpy.feats.plot_vad` to visualize VAD by clipping the beginning and ending silences (if True) or VAD instances throughout the signal (if False).
49 |    -  added `soundpy.models.dataprep.GeneratorFeatExtraction` class for extracting and augmenting features during training (still experimental).
50 |    -  added `soundpy.models.builtin.envclassifier_extract_train` as an example of extracting and augmenting features during training (still experimental).
51 |    -  added `soundpy.dsp.clip_at_zero` to enable smoother concatenations of signals and enables removal of clicks at beginning and ending of signals.
52 |    -  added `soundpy.dsp.remove_dc_bias` to enable smoother concatenations of signals
53 |    -  added and set `remove_dc` parameter to True in `soundpy.files.loadsound` and `soundpy.files.savesound` to ensure signals all have mean zero.
54 |    -  added `mirror_sound` option to `soundpy.dsp.apply_sample_length` as a way to extend sound.
55 |    -  added `soundpy.dsp.ismono` to check if samples were mono or stereo.
56 |    -  added `soundpy.dsp.average_channels` to average sample amplitudes across channels, e.g. to identify where high energy begins / ends in the signal without disregarding additional channels (if stereo sound).
57 |    -  added `soundpy.dsp.add_channels` for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound)
58 |    -  added stereo sound functionality to `soundpy.dsp.add_backgroundsound`, `soundpy.dsp.clip_at_zero`, `soundpy.dsp.calc_fft`, `soundpy.feats.get_stft`, `soundpy.feats.get_vad_stft` 
59 |    
60 | 
61 | Other changes
62 |    -  name change: from pysoundtool to soundpy: simpler
63 |    -  updated dependencies to newest versions still compatible with Tensorflow 2.1.0
64 |    -  moved `soundpy.dsp.get_vad_samples` to `soundpy.feats.get_vad_samples`
65 |    -  moved `soundpy.dsp.get_vad_stft` to `soundpy.feats.get_vad_stft`
66 |    -  name change: allow `soundpy.feats.normalize` to be used as `soundpy.normalize`
67 |    -  removed `pysoundtool_online` and mybinder button as maintaining the online version was not easily done. Aim to reimplement at some point.
68 |    
69 | 
70 | 
71 | v0.1.0a1
72 | ========
73 | 
74 | Initial public alpha release.
75 | 


--------------------------------------------------------------------------------
/docs/source/datasets.rst:
--------------------------------------------------------------------------------
1 | 
2 | Organizing datasets
3 | -------------------
4 | 
5 | .. automodule:: soundpy.datasets
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/dsp.rst:
--------------------------------------------------------------------------------
1 | 
2 | Working with signals
3 | --------------------
4 | 
5 | .. automodule:: soundpy.dsp
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/example_cases.rst:
--------------------------------------------------------------------------------
1 | 
2 | .. toctree::
3 |    :maxdepth: 2
4 | 
5 | .. include:: auto_examples/index.rst
6 | 


--------------------------------------------------------------------------------
/docs/source/examples/README.txt:
--------------------------------------------------------------------------------
1 |  
2 | -----------------------------
3 | SoundPy Example Use Cases
4 | -----------------------------
5 | 


--------------------------------------------------------------------------------
/docs/source/examples/plot_dataset_info_formatting.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 | ========================================
 4 | Audio Dataset Exploration and Formatting
 5 | ========================================
 6 | 
 7 | Examine audio files within a dataset, and reformat them if desired.  
 8 | 
 9 | To see how soundpy implements this, see `soundpy.builtin.dataset_logger` and 
10 | `soundpy.builtin.dataset_formatter`.
11 | """
12 | 
13 | #####################################################################
14 | # Let's import soundpy 
15 | import soundpy as sp
16 | 
17 | ###############################################################################################
18 | #  
19 | # Dataset Exploration
20 | # ^^^^^^^^^^^^^^^^^^^
21 | 
22 | ##########################################################
23 | # Designate path relevant for accessing audiodata
24 | sp_dir = '../../../'
25 | 
26 | ##########################################################
27 | # I will explore files in a small dataset on my computer with varying file formats.
28 | dataset_path = '{}audiodata2/'.format(sp_dir)
29 | dataset_info_dict = sp.builtin.dataset_logger('{}audiodata2/'.format(sp_dir));
30 | 
31 | #########################################################################
32 | # This returns our data in a dictionary, perfect for exploring via Pandas
33 | import pandas as pd
34 | all_data = pd.DataFrame(dataset_info_dict).T
35 | all_data.head()
36 | 
37 | ###################################
38 | # Let's have a look at the audio files and how uniform they are:
39 | print('formats: ', all_data.format_type.unique())
40 | print('bitdepth (types): ', all_data.bitdepth.unique())
41 | print('mean duration (sec): ', all_data.dur_sec.mean())
42 | print('std dev duration (sec): ', all_data.dur_sec.std())
43 | print('min sample rate: ', all_data.sr.min())
44 | print('max sample rate: ', all_data.sr.max())
45 | print('number of channels: ', all_data.num_channels.unique())
46 | 
47 | 
48 | ##########################################################
49 | # For a visual example, let's plot the count of various sample rates. (48000 Hz is high definition sound, 16000 Hz is wideband, and 8000 Hz is narrowband, similar to how speech sounds on the telephone.)
50 | all_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
51 | 
52 | ###############################################################################################
53 | # Reformat a Dataset
54 | # ^^^^^^^^^^^^^^^^^^
55 | 
56 | ##############################################################
57 | # Let's say we have a dataset that we want to make consistent. 
58 | # We can do that with soundpy
59 | new_dataset_dir = sp.builtin.dataset_formatter(
60 |     dataset_path, 
61 |     recursive = True, # we want all the audio, even in nested directories
62 |     format='WAV',
63 |     bitdepth = 16, # if set to None, a default bitdepth will be applied
64 |     sr = 16000, # wideband
65 |     mono = True, # ensure data all have 1 channel
66 |     dur_sec = 3, # audio will be limited to 3 seconds
67 |     zeropad = True, # audio shorter than 3 seconds will be zeropadded
68 |     new_dir = './example_dir/', # if None, a time-stamped directory will be created for you
69 |     overwrite = False # can set to True if you want to overwrite files
70 |     );
71 |         
72 | ###############################################
73 | # Let's see what the audio data looks like now:
74 | dataset_formatted_dict = sp.builtin.dataset_logger(new_dataset_dir, recursive=True);
75 | formatted_data = pd.DataFrame(dataset_formatted_dict).T
76 | 
77 | #####################
78 | formatted_data.head()
79 | 
80 | ###################################
81 | print('audio formats: ', formatted_data.format_type.unique())
82 | print('bitdepth (types): ', formatted_data.bitdepth.unique())
83 | print('mean duration (sec): ', formatted_data.dur_sec.mean())
84 | print('std dev duration (sec): ', formatted_data.dur_sec.std())
85 | print('min sample rate: ', formatted_data.sr.min())
86 | print('max sample rate: ', formatted_data.sr.max())
87 | print('number of channels: ', formatted_data.num_channels.unique())
88 | 
89 | ##########################################################
90 | # Now all the audio data is sampled at the same rate: 8000 Hz
91 | formatted_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
92 | 
93 | ###########################################
94 | # There we go! 
95 | # You can reformat only parts of the audio files, e.g. format or bitdepth.
96 | # If you leave parameters in sp.builtin.dataset_formatter as None, the original
97 | # settings of the audio file will be maintained (except for bitdepth. 
98 | # A default bitdepth will be applied according to the format of the file); see `soundfile.default_subtype`.
99 | 


--------------------------------------------------------------------------------
/docs/source/examples/plot_extract_augment_train_classifier.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | ==================================================
  4 | Extract, Augment, and Train an Acoustic Classifier
  5 | ==================================================
  6 | 
  7 | Extract and augment features as an acoustic classifier is trained on speech.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_extract_train`.
 10 | """
 11 | 
 12 | ###############################################################################################
 13 | #
 14 | 
 15 | import os, sys
 16 | import inspect
 17 | currentdir = os.path.dirname(os.path.abspath(
 18 |     inspect.getfile(inspect.currentframe())))
 19 | parentdir = os.path.dirname(currentdir)
 20 | parparentdir = os.path.dirname(parentdir)
 21 | packagedir = os.path.dirname(parparentdir)
 22 | sys.path.insert(0, packagedir)
 23 | 
 24 | import matplotlib.pyplot as plt
 25 | import IPython.display as ipd
 26 | package_dir = '../../../'
 27 | os.chdir(package_dir)
 28 | sp_dir = package_dir
 29 | 
 30 | 
 31 | #####################################################################
 32 | # Let's import soundpy for handling sound
 33 | import soundpy as sp
 34 | #####################################################################
 35 | # As well as the deep learning component of soundpy
 36 | from soundpy import models as spdl
 37 | 
 38 | 
 39 | ######################################################
 40 | # Prepare for Training: Data Organization
 41 | # =======================================
 42 | 
 43 | ######################################################
 44 | # I will use a sample speech commands data set:
 45 | 
 46 | ##########################################################
 47 | # Designate path relevant for accessing audiodata
 48 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir)
 49 | 
 50 | 
 51 | ######################################################
 52 | # Setup a Feature Settings Dictionary
 53 | # -----------------------------------
 54 | 
 55 | 
 56 | feature_type = 'fbank'
 57 | num_filters = 40
 58 | rate_of_change = False
 59 | rate_of_acceleration = False
 60 | dur_sec = 1
 61 | win_size_ms = 25
 62 | percent_overlap = 0.5
 63 | sr = 22050
 64 | fft_bins = None
 65 | num_mfcc = None
 66 | real_signal = True
 67 | 
 68 | get_feats_kwargs = dict(feature_type = feature_type,
 69 |                         sr = sr,
 70 |                         dur_sec = dur_sec,
 71 |                         win_size_ms = win_size_ms,
 72 |                         percent_overlap = percent_overlap,
 73 |                         fft_bins = fft_bins,
 74 |                         num_filters = num_filters,
 75 |                         num_mfcc = num_mfcc,
 76 |                         rate_of_change = rate_of_change,
 77 |                         rate_of_acceleration = rate_of_acceleration,
 78 |                         real_signal = real_signal)
 79 | 
 80 | ######################################################
 81 | # Setup an Augmentation Dictionary
 82 | # --------------------------------
 83 | # This will apply augmentations at random at each epoch.
 84 | augmentation_all = dict([('add_white_noise',True),
 85 |                         ('speed_decrease', True),
 86 |                         ('speed_increase', True),
 87 |                         ('pitch_decrease', True),
 88 |                         ('pitch_increase', True),
 89 |                         ('harmonic_distortion', True),
 90 |                         ('vtlp', True)
 91 |                         ])
 92 | 
 93 | ##########################################################
 94 | # see the default values for these augmentations
 95 | augment_settings_dict = {}
 96 | for key in augmentation_all.keys():
 97 |     augment_settings_dict[key] = sp.augment.get_augmentation_settings_dict(key)
 98 | for key, value in augment_settings_dict.items():
 99 |     print(key, ' : ', value)
100 |     
101 | ##########################################################
102 | # Adjust Augmentation Defaults
103 | # ----------------------------
104 | 
105 | 
106 | ##########################################################
107 | # Adjust Add White Noise
108 | # ~~~~~~~~~~~~~~~~~~~~~~
109 | # I want the SNR of the white noise to vary between several: 
110 | # SNR 10, 15, and 20. 
111 | augment_settings_dict['add_white_noise']['snr'] = [10,15,20]
112 | 
113 | ##########################################################
114 | # Adjust Pitch Decrease
115 | # ~~~~~~~~~~~~~~~~~~~~~
116 | # I found the pitch changes too exaggerated, so I will 
117 | # set those to 1 instead of 2 semitones.  
118 | augment_settings_dict['pitch_decrease']['num_semitones'] = 1 
119 | 
120 | ##########################################################
121 | # Adjust Pitch Increase
122 | # ~~~~~~~~~~~~~~~~~~~~~
123 | augment_settings_dict['pitch_increase']['num_semitones'] = 1 
124 | 
125 | ##########################################################
126 | # Adjust Speed Decrease
127 | # ~~~~~~~~~~~~~~~~~~~~~
128 | augment_settings_dict['speed_decrease']['perc'] = 0.1 
129 | 
130 | ##########################################################
131 | # Adjust Speed Increase
132 | # ~~~~~~~~~~~~~~~~~~~~~
133 | augment_settings_dict['speed_increase']['perc'] = 0.1 
134 | 
135 | 
136 | ######################################################
137 | # Update an Augmentation Dictionary
138 | # ---------------------------------
139 | # We'll include in the dictionary the settings we want for augmentations:
140 | augmentation_all.update(
141 |     dict(augment_settings_dict = augment_settings_dict))
142 | 
143 | 
144 | ######################################################
145 | # Train the Model
146 | # ===============
147 | # Note: disregard the warning:
148 | # WARNING: Only the power spectrum of the VTLP augmented signal can be returned due to resizing the augmentation from (56, 4401) to (79, 276)
149 | # 
150 | # This is due to the hyper frequency resolution applied to the audio during 
151 | # vocal-tract length perturbation, and then deresolution to bring to correct size.
152 | # The current implementation applies the deresolution to the power spectrum rather than
153 | # directly to the STFT. 
154 | model_dir, history = spdl.envclassifier_extract_train(
155 |     model_name = 'augment_builtin_speechcommands',
156 |     audiodata_path = data_dir,
157 |     augment_dict = augmentation_all,
158 |     labeled_data = True,
159 |     batch_size = 1,
160 |     epochs = 50, 
161 |     patience = 5,
162 |     visualize = True,
163 |     vis_every_n_items = 1,
164 |     **get_feats_kwargs)
165 | 
166 | #############################################################
167 | # Let's plot how the model performed (on this small dataset)
168 | plt.clf()
169 | plt.plot(history.history['accuracy'])
170 | plt.plot(history.history['val_accuracy'])
171 | plt.title('model accuracy')
172 | plt.ylabel('accuracy')
173 | plt.xlabel('epoch')
174 | plt.legend(['train', 'val'], loc='upper right')
175 | plt.savefig('accuracy.png')
176 | 


--------------------------------------------------------------------------------
/docs/source/examples/plot_featureprep_denoiser.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =======================================================
  4 | Feature Extraction for Denoising: Clean and Noisy Audio
  5 | =======================================================
  6 | 
  7 | Extract acoustic features from clean and noisy datasets for 
  8 | training a denoising model, e.g. a denoising autoencoder.
  9 | 
 10 | To see how soundpy implements this, see `soundpy.builtin.denoiser_feats`.
 11 | """
 12 | 
 13 | 
 14 | ###############################################################################################
 15 | # 
 16 | 
 17 | #####################################################################
 18 | import os, sys
 19 | import inspect
 20 | currentdir = os.path.dirname(os.path.abspath(
 21 |     inspect.getfile(inspect.currentframe())))
 22 | parentdir = os.path.dirname(currentdir)
 23 | parparentdir = os.path.dirname(parentdir)
 24 | packagedir = os.path.dirname(parparentdir)
 25 | sys.path.insert(0, packagedir)
 26 | 
 27 | import soundpy as sp 
 28 | import IPython.display as ipd
 29 | package_dir = '../../../'
 30 | os.chdir(package_dir)
 31 | sp_dir = package_dir
 32 | 
 33 | ######################################################
 34 | # Prepare for Extraction: Data Organization
 35 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 36 | 
 37 | ######################################################
 38 | # I will use a mini denoising dataset as an example
 39 | 
 40 | # Example noisy data:
 41 | data_noisy_dir = '{}../mini-audio-datasets/denoise/noisy'.format(sp_dir)
 42 | # Example clean data:
 43 | data_clean_dir = '{}../mini-audio-datasets/denoise/clean'.format(sp_dir)
 44 | # Where to save extracted features:
 45 | data_features_dir = './audiodata/example_feats_models/denoiser/'
 46 | 
 47 | ######################################################
 48 | # Choose Feature Type 
 49 | # ~~~~~~~~~~~~~~~~~~~
 50 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
 51 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
 52 | 
 53 | feature_type = 'stft'
 54 | sr = 22050
 55 | 
 56 | ######################################################
 57 | # Set Duration of Audio 
 58 | # ~~~~~~~~~~~~~~~~~~~~~
 59 | # How much audio in seconds used from each audio file.
 60 | # the speech samples are about 3 seconds long.
 61 | dur_sec = 3
 62 | 
 63 | #######################################################################
 64 | # Option 1: Built-In Functionality: soundpy does everything for you
 65 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 66 | 
 67 | ############################################################
 68 | # Define which data to use and which features to extract. 
 69 | # NOTE: beacuse of the very small dataset, will set 
 70 | # `perc_train` to a lower level than 0.8. (Otherwise, will raise error)
 71 | # Everything else is based on defaults. A feature folder with
 72 | # the feature data will be created in the current working directory.
 73 | # (Although, you can set this under the parameter `data_features_dir`)
 74 | # `visualize` saves periodic images of the features extracted.
 75 | # This is useful if you want to know what's going on during the process.
 76 | perc_train = 0.6 # with larger datasets this would be around 0.8
 77 | extraction_dir = sp.denoiser_feats(
 78 |     data_clean_dir = data_clean_dir, 
 79 |     data_noisy_dir = data_noisy_dir,
 80 |     sr = sr,
 81 |     feature_type = feature_type, 
 82 |     dur_sec = dur_sec,
 83 |     perc_train = perc_train,
 84 |     visualize=True);
 85 | extraction_dir
 86 | 
 87 | ################################################################
 88 | # The extracted features, extraction settings applied, and 
 89 | # which audio files were assigned to which datasets
 90 | # will be saved in the `extraction_dir` directory
 91 | 
 92 | 
 93 | ############################################################
 94 | # Logged Information
 95 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 96 | # Let's have a look at the files in the extraction_dir. The files ending 
 97 | # with .npy extension contain the feature data; the .csv files contain 
 98 | # logged information. 
 99 | featfiles = list(extraction_dir.glob('*.*'))
100 | for f in featfiles:
101 |     print(f.name)
102 |   
103 | ############################################################
104 | # Feature Settings
105 | # ~~~~~~~~~~~~~~~~~~
106 | # Since much was conducted behind the scenes, it's nice to know how the features
107 | # were extracted, for example, the sample rate and number of frequency bins applied, etc.
108 | feat_settings = sp.utils.load_dict(
109 |     extraction_dir.joinpath('log_extraction_settings.csv'))
110 | for key, value in feat_settings.items():
111 |     print(key, ' ---> ', value)
112 |     
113 | 


--------------------------------------------------------------------------------
/docs/source/examples/plot_featureprep_envclassifier.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =====================================
  4 | Feature Extraction for Classification
  5 | =====================================
  6 | 
  7 | Extract acoustic features from labeled data for 
  8 | training an environment or speech classifier.
  9 | 
 10 | To see how soundpy implements this, see `soundpy.builtin.envclassifier_feats`.
 11 | """
 12 | 
 13 | 
 14 | ###############################################################################################
 15 | # 
 16 | 
 17 | 
 18 | #####################################################################
 19 | import os, sys
 20 | import inspect
 21 | currentdir = os.path.dirname(os.path.abspath(
 22 |     inspect.getfile(inspect.currentframe())))
 23 | parentdir = os.path.dirname(currentdir)
 24 | parparentdir = os.path.dirname(parentdir)
 25 | packagedir = os.path.dirname(parparentdir)
 26 | sys.path.insert(0, packagedir)
 27 | 
 28 | import soundpy as sp 
 29 | import IPython.display as ipd
 30 | package_dir = '../../../'
 31 | os.chdir(package_dir)
 32 | sp_dir = package_dir
 33 | 
 34 | ######################################################
 35 | # Prepare for Extraction: Data Organization
 36 | # -----------------------------------------
 37 | 
 38 | ######################################################
 39 | # I will use a sample speech commands data set:
 40 | 
 41 | ##########################################################
 42 | # Designate path relevant for accessing audiodata
 43 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir)
 44 | 
 45 | ######################################################
 46 | # Choose Feature Type 
 47 | # ~~~~~~~~~~~~~~~~~~~
 48 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
 49 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
 50 | 
 51 | feature_type = 'fbank'
 52 | 
 53 | ######################################################
 54 | # Set Duration of Audio 
 55 | # ~~~~~~~~~~~~~~~~~~~~~
 56 | # How much audio in seconds used from each audio file.
 57 | # The example noise and speech files are only 1 second long
 58 | dur_sec = 1
 59 | 
 60 | 
 61 | #############################################################
 62 | # Built-In Functionality - soundpy extracts the features for you
 63 | # ---------------------------------------------------------------
 64 | 
 65 | ############################################################
 66 | # Define which data to use and which features to extract
 67 | # Everything else is based on defaults. A feature folder with
 68 | # the feature data will be created in the current working directory.
 69 | # (Although, you can set this under the parameter `data_features_dir`)
 70 | # `visualize` saves periodic images of the features extracted.
 71 | # This is useful if you want to know what's going on during the process.
 72 | extraction_dir = sp.envclassifier_feats(data_dir, 
 73 |                                           feature_type=feature_type, 
 74 |                                           dur_sec=dur_sec,
 75 |                                           visualize=True);
 76 | 
 77 | ################################################################
 78 | # The extracted features, extraction settings applied, and 
 79 | # which audio files were assigned to which datasets
 80 | # will be saved in the following directory:
 81 | extraction_dir
 82 | 
 83 | ############################################################
 84 | # Logged Information
 85 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 86 | # Let's have a look at the files in the extraction_dir. The files ending 
 87 | # with .npy extension contain the feature data; the .csv files contain 
 88 | # logged information. 
 89 | featfiles = list(extraction_dir.glob('*.*'))
 90 | for f in featfiles:
 91 |     print(f.name)
 92 |   
 93 | ############################################################
 94 | # Feature Settings
 95 | # ~~~~~~~~~~~~~~~~~~
 96 | # Since much was conducted behind the scenes, it's nice to know how the features
 97 | # were extracted, for example, the sample rate and number of frequency bins applied, etc.
 98 | feat_settings = sp.utils.load_dict(
 99 |     extraction_dir.joinpath('log_extraction_settings.csv'))
100 | for key, value in feat_settings.items():
101 |     print(key, ' ---> ', value)
102 |     
103 |     
104 | ############################################################
105 | # Labeled Data
106 | # ~~~~~~~~~~~~~~~~~~
107 | # These are the labels and their encoded values:
108 | encode_dict = sp.utils.load_dict(
109 |     extraction_dir.joinpath('dict_encode.csv'))
110 | for key, value in encode_dict.items():
111 |     print(key, ' ---> ', value)
112 | 


--------------------------------------------------------------------------------
/docs/source/examples/plot_filter_out_noise.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | """
  4 | ===========================
  5 | Filter Out Background Noise
  6 | ===========================
  7 | 
  8 | Filter out background noise from noisy speech signals. 
  9 | 
 10 | To see how soundpy implements this, see `soundpy.builtin.filtersignal`.
 11 | 
 12 | As a general note for filtering, the Wiener Filter is the default filter for soundpy. It seems to filter signals more consequently than the Band Spectral Subtraction Filter.
 13 | """
 14 | 
 15 | 
 16 | ###############################################################################################
 17 | # 
 18 | 
 19 | 
 20 | #####################################################################
 21 | 
 22 | # Let's import soundpy, and ipd for playing audio data
 23 | import soundpy as sp
 24 | import IPython.display as ipd
 25 | 
 26 | 
 27 | ######################################################
 28 | # Define the noisy and clean speech audio files.
 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 30 | # Note: these files are available in the soundpy repo.
 31 | # Designate path relevant for accessing audiodata
 32 | sp_dir = '../../../'
 33 | 
 34 | ##########################################################
 35 | # Noise sample:
 36 | noise = '{}audiodata/background_samples/traffic.wav'.format(sp_dir)
 37 | noise = sp.string2pathlib(noise)
 38 | speech = '{}audiodata/python.wav'.format(sp_dir)
 39 | speech = sp.utils.string2pathlib(speech)
 40 | 
 41 | ##########################################################
 42 | # For filtering, we will set the sample rate to be quite high:
 43 | sr = 48000
 44 | 
 45 | ##########################################################
 46 | # Create noisy speech signal as SNR 10
 47 | noisy, snr_measured = sp.dsp.add_backgroundsound(
 48 |     speech, 
 49 |     noise, 
 50 |     sr = sr, 
 51 |     snr = 10, 
 52 |     total_len_sec = 2, 
 53 |     pad_mainsound_sec = 0.5)
 54 | 
 55 | ##########################################################
 56 | # Hear and see the noisy speech 
 57 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 58 | 
 59 | ipd.Audio(noisy,rate=sr)
 60 | 
 61 | ##########################################################
 62 | sp.plotsound(noisy, sr=sr, feature_type='signal', 
 63 |                title = 'Noisy Speech', subprocess=True)
 64 | 
 65 | 
 66 | ##########################################################
 67 | # Hear and see the clean speech 
 68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 69 | s, sr = sp.loadsound(speech, sr=sr)
 70 | ipd.Audio(s,rate=sr)
 71 | 
 72 | ##########################################################
 73 | sp.plotsound(s, sr=sr, feature_type='signal', 
 74 |                title = 'Clean Speech', subprocess=True)
 75 | 
 76 | 
 77 | ##########################################################
 78 | # Filter the noisy speech
 79 | # ^^^^^^^^^^^^^^^^^^^^^^^
 80 | 
 81 | ##########################################################
 82 | # Wiener Filter 
 83 | # ~~~~~~~~~~~~~
 84 | 
 85 | ##########################################################
 86 | # Let's filter with a Wiener filter:
 87 | noisy_wf, sr = sp.filtersignal(noisy,
 88 |                                  sr = sr,
 89 |                                  filter_type = 'wiener') # default
 90 | 
 91 | ##########################################################
 92 | ipd.Audio(noisy_wf,rate=sr)
 93 | 
 94 | ##########################################################
 95 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal', 
 96 |                title = 'Noisy Speech: Wiener Filter', 
 97 |                subprocess=True)
 98 | 
 99 | #################################################################
100 | # Wiener Filter with Postfilter
101 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
102 | 
103 | ##########################################################
104 | # Let's filter with a Wiener filter and postfilter
105 | noisy_wfpf, sr = sp.filtersignal(noisy,
106 |                                  sr = sr,
107 |                                  filter_type = 'wiener',
108 |                                  apply_postfilter = True) 
109 | 
110 | ##########################################################
111 | ipd.Audio(noisy_wfpf,rate=sr)
112 | 
113 | ##########################################################
114 | sp.plotsound(noisy_wfpf, sr=sr, feature_type = 'signal', 
115 |                title = 'Noisy Speech: Wiener Filter with Postfilter', 
116 |                subprocess=True)
117 | 
118 | #################################################################
119 | # Band Spectral Subtraction
120 | # ~~~~~~~~~~~~~~~~~~~~~~~~~
121 | 
122 | ##########################################################
123 | # Let's filter using band spectral subtraction
124 | noisy_bs, sr = sp.filtersignal(noisy,
125 |                                  sr = sr,
126 |                                  filter_type = 'bandspec') 
127 | 
128 | ##########################################################
129 | ipd.Audio(noisy_bs,rate=sr)
130 | 
131 | ##########################################################
132 | sp.plotsound(noisy_bs, sr = sr, feature_type = 'signal', 
133 |                title = 'Noisy Speech: Band Spectral Subtraction', 
134 |                subprocess=True)
135 | 
136 | 
137 | #################################################################
138 | # Band Spectral Subtraction with Postfilter
139 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
140 | 
141 | #########################################################################
142 | # Finally, let's filter using band spectral subtraction with a postfilter
143 | noisy_bspf, sr = sp.filtersignal(noisy,
144 |                                  sr = sr,
145 |                                  filter_type = 'bandspec', 
146 |                                  apply_postfilter = True) 
147 | 
148 | ##########################################################
149 | ipd.Audio(noisy_bspf,rate=sr)
150 | 
151 | ##########################################################
152 | sp.plotsound(noisy_bspf, sr = sr, feature_type = 'signal', 
153 |                title = 'Noisy Speech: Band Spectral Subtraction with Postfilter', 
154 |                subprocess=True)
155 | 
156 | 
157 | ##########################################################
158 | # Filter: increase the scale
159 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^
160 | 
161 | ##########################################################
162 | # Let's filter with a Wiener filter:
163 | filter_scale = 5
164 | noisy_wf, sr = sp.filtersignal(noisy,
165 |                                  sr=sr,
166 |                                  filter_type = 'wiener',
167 |                                  filter_scale = filter_scale)
168 | 
169 | ##########################################################
170 | # Wiener Filter
171 | # ~~~~~~~~~~~~~
172 | 
173 | ##########################################################
174 | ipd.Audio(noisy_wf,rate=sr)
175 | 
176 | ##########################################################
177 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal', 
178 |                title = 'Noisy Speech: Wiener Filter Scale {}'.format(filter_scale), 
179 |                subprocess=True)
180 | 
181 | #################################################################
182 | # Wiener Filter with Postfilter
183 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
184 | 
185 | ##########################################################
186 | # Let's filter with a Wiener filter and postfilter
187 | noisy_wfpf, sr = sp.filtersignal(noisy,
188 |                                  sr = sr,
189 |                                  filter_type = 'wiener',
190 |                                  apply_postfilter = True,
191 |                                  filter_scale = filter_scale) 
192 | 
193 | ##########################################################
194 | ipd.Audio(noisy_wfpf,rate = sr)
195 | 
196 | ##########################################################
197 | sp.plotsound(noisy_wfpf, sr = sr, feature_type = 'signal', 
198 |                title = 'Noisy Speech: Wiener Filter with Postfilter Scale {}'.format(filter_scale),
199 |                subprocess=True)
200 | 


--------------------------------------------------------------------------------
/docs/source/examples/plot_implement_denoiser.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =================================
  4 | Implement a Denoising Autoencoder
  5 | =================================
  6 | 
  7 | Implement denoising autoencoder to denoise a noisy speech signal.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_run`.
 10 | """
 11 | 
 12 | 
 13 | ############################################################################################
 14 | # 
 15 | 
 16 | #####################################################################
 17 | # Let's import soundpy and other packages
 18 | import soundpy as sp
 19 | import numpy as np
 20 | # for playing audio in this notebook:
 21 | import IPython.display as ipd
 22 | 
 23 | #####################################################################
 24 | # As well as the deep learning component of soundpy
 25 | from soundpy import models as spdl
 26 | 
 27 | ######################################################
 28 | # Prepare for Implementation: Data Organization
 29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 30 | 
 31 | ##########################################################
 32 | # Set path relevant for audio data for this example
 33 | sp_dir = '../../../'
 34 | 
 35 | ######################################################
 36 | # Set model pathway
 37 | # ~~~~~~~~~~~~~~~~~
 38 | # Currently, this expects a model saved with weights, with a .h5 extension.
 39 | # (See `model` below)
 40 | 
 41 | ######################################################
 42 | # The soundpy repo offers a pre-trained denoiser, which we'll use.
 43 | model = '{}audiodata/models/'.format(sp_dir)+\
 44 |     'denoiser/example_denoiser_stft.h5'
 45 | # ensure is a pathlib.PosixPath object
 46 | print(model)
 47 | model = sp.utils.string2pathlib(model)
 48 | model_dir = model.parent
 49 | 
 50 | #########################################################
 51 | # What is in this folder?
 52 | files = list(model_dir.glob('*.*'))
 53 | for f in files:
 54 |     print(f.name)
 55 |   
 56 | ######################################################
 57 | # Provide dictionary with feature extraction settings
 58 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 59 | 
 60 | #########################################################
 61 | # If soundpy extracts features for you, a 'log_extraction_settings.csv' 
 62 | # file will be saved, which includes relevant feature settings for implementing 
 63 | # the model; see `soundpy.feats.save_features_datasets`
 64 | feat_settings = sp.utils.load_dict(
 65 |     model_dir.joinpath('log_extraction_settings.csv'))
 66 | for key, value in feat_settings.items():
 67 |     print(key, ' --> ', value)
 68 |     # change objects that were string to original format
 69 |     import ast
 70 |     try:
 71 |         feat_settings[key] = ast.literal_eval(value)
 72 |     except ValueError:
 73 |         pass
 74 |     except SyntaxError:
 75 |         pass
 76 | 
 77 | #########################################################
 78 | # For the purposes of plotting, let's use some of the settings defined:
 79 | feature_type = feat_settings['feature_type']
 80 | sr = feat_settings['sr']
 81 | 
 82 | ######################################################
 83 | # Provide new audio for the denoiser to denoise!
 84 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 85 | 
 86 | #########################################################
 87 | # We'll use sample speech from the soundpy repo:
 88 | speech = sp.string2pathlib('{}audiodata/python.wav'.format(sp_dir))
 89 | s, sr = sp.loadsound(speech, sr=sr)
 90 | 
 91 | #########################################################
 92 | # Let's add some white noise (10 SNR)
 93 | s_n = sp.augment.add_white_noise(s, sr=sr, snr=10)
 94 | 
 95 | ##############################################################
 96 | # What does the noisy audio sound like?
 97 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 98 | ipd.Audio(s_n,rate=sr)
 99 | 
100 | ##############################################################
101 | # What does the noisy audio look like?
102 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
103 | sp.plotsound(s_n, sr = sr, feature_type='signal', subprocess=True)
104 | 
105 | ##############################################################
106 | # What does the clean audio sound like?
107 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 | ipd.Audio(s,rate=sr)
109 | 
110 | ##############################################################
111 | # What does the clean audio look like?
112 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
113 | sp.plotsound(s, sr = sr, feature_type='signal', subprocess=True)
114 | 
115 | #########################################################################
116 | # Built-In Denoiser Functionality
117 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
118 | 
119 | ##############################################################
120 | # We just need to feed the model path, the noisy sample path, and 
121 | # the feature settings dictionary we looked at above.
122 | y, sr = spdl.denoiser_run(model, s_n, feat_settings)
123 | 
124 | ##########################################################
125 | # How does the output sound?
126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~
127 | ipd.Audio(y,rate=sr)
128 | 
129 | ##########################################################
130 | # How does is the output look? 
131 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
132 | sp.plotsound(y, sr=sr, feature_type = feature_type, subprocess=True)
133 | 
134 | ##########################################################
135 | # How do the features compare?
136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137 | 
138 | ##########################################################
139 | # STFT features of the noisy input speech:
140 | sp.plotsound(s_n, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
141 |                title = 'Noisy input: STFT features', subprocess=True)
142 | 
143 | ##########################################################
144 | # STFT features of the output
145 | sp.plotsound(y, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
146 |                title = 'Denoiser Output: STFT features', subprocess=True)
147 | 
148 | ##########################################################
149 | # STFT features of the clean version of the audio:
150 | sp.plotsound(s, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
151 |                title = 'Clean "target" audio: STFT features', subprocess=True)
152 | 
153 | 
154 | ##########################################################
155 | # It's not perfect but for a pretty simple implementation, the noise is gone
156 | # and you can hear the person speaking. Pretty cool! 
157 | 


--------------------------------------------------------------------------------
/docs/source/examples/plot_signals_and_features.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =======================
  4 | Create and Plot Signals
  5 | =======================
  6 | 
  7 | Create and plot signals / noise; combine them at a specific SNR.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.dsp.generate_sound`, 
 10 | `soundpy.dsp.generate_noise` and `soundpy.dsp.add_backgroundsound`.
 11 | """
 12 | 
 13 | 
 14 | ###############################################################################################
 15 | #  
 16 | 
 17 | #####################################################################
 18 | # Let's import soundpy
 19 | import soundpy as sp
 20 | 
 21 | ###########################################################################
 22 | # Create a Signal
 23 | # ^^^^^^^^^^^^^^^
 24 | 
 25 | ########################################################################
 26 | # First let's set what sample rate we want to use
 27 | sr = 44100
 28 | 
 29 | 
 30 | #########################################################################
 31 | # Let's create a signal of 10 Hz 
 32 | sig1_hz = 10
 33 | sig1, sr = sp.generate_sound(freq=sig1_hz, amplitude = 0.4, sr=sr, dur_sec=1)
 34 | sp.plotsound(sig1, sr=sr, feature_type = 'signal',
 35 |                title = 'Signal: {} Hz'.format(sig1_hz), subprocess=True)
 36 | 
 37 | 
 38 | #########################################################################
 39 | # Let's create a signal of 20 Hz
 40 | sig2_hz = 20 
 41 | sig2, sr = sp.generate_sound(freq=sig2_hz, amplitude= 0.4, sr=sr, dur_sec=1)
 42 | sp.plotsound(sig2, sr=sr, feature_type = 'signal',
 43 |                title = 'Signal: {} Hz'.format(sig2_hz), subprocess=True)
 44 | 
 45 | ###########################################################################
 46 | # Combine Signals 
 47 | # ^^^^^^^^^^^^^^^
 48 | 
 49 | 
 50 | #########################################################################
 51 | # Add them together and see what they look like:
 52 | sig3 = sig1 + sig2
 53 | sp.plotsound(sig3, sr=sr, feature_type = 'signal', 
 54 |                title='Mixed Signals: {} Hz + {} Hz'.format(sig1_hz, sig2_hz), 
 55 |                subprocess=True)
 56 | 
 57 | 
 58 | ##########################################################################
 59 | # Generate Pseudo-Random Noise
 60 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 61 | 
 62 | 
 63 | #########################################################################
 64 | # Create noise to add to the signal:
 65 | noise = sp.generate_noise(len(sig3), amplitude=0.02, random_seed=40)
 66 | sp.plotsound(noise, sr=sr, feature_type = 'signal',
 67 |                title='Random Noise', subprocess=True)
 68 | 
 69 | ###########################################################################
 70 | # Control SNR: Adding a Background Sound
 71 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 72 | 
 73 | #########################################################################
 74 | # Add noise at signal-to-noise ratio of 40
 75 | sig_noisy, snr = sp.dsp.add_backgroundsound(
 76 |     audio_main = sig3, 
 77 |     audio_background = noise, 
 78 |     sr = sr,
 79 |     snr = 40,
 80 |     clip_at_zero = False)
 81 | 
 82 | # keep energy between 1 and -1 
 83 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
 84 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 40 SNR',
 85 |              subprocess=True)
 86 | 
 87 | #########################################################################
 88 | # Add noise at signal-to-noise ratio of 20
 89 | sig_noisy, snr = sp.dsp.add_backgroundsound(
 90 |     audio_main = sig3, 
 91 |     audio_background = noise,
 92 |     sr = sr,
 93 |     snr = 20)
 94 | # keep energy between 1 and -1 
 95 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
 96 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 20 SNR',
 97 |              subprocess=True)
 98 | 
 99 | #########################################################################
100 | # Add noise at signal-to-noise ratio of 10
101 | sig_noisy, snr = sp.dsp.add_backgroundsound(
102 |     audio_main = sig3, 
103 |     audio_background = noise,
104 |     sr = sr,
105 |     snr = 10)
106 | # keep energy between 1 and -1 
107 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
108 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 10 SNR',
109 |              subprocess=True)
110 | 
111 | #########################################################################
112 | # Add noise at signal-to-noise ratio of 0
113 | sig_noisy, snr = sp.dsp.add_backgroundsound(
114 |     audio_main = sig3,
115 |     audio_background = noise,
116 |     sr = sr,
117 |     snr = 0)
118 | # keep energy between 1 and -1 
119 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
120 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 0 SNR',
121 |              subprocess=True)
122 | 
123 | 
124 | #########################################################################
125 | # Add noise at signal-to-noise ratio of -10
126 | sig_noisy, snr = sp.dsp.add_backgroundsound(
127 |     audio_main = sig3, 
128 |     audio_background = noise,
129 |     sr = sr,
130 |     snr = -10)
131 | # keep energy between 1 and -1 
132 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
133 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: -10 SNR',
134 |              subprocess=True)
135 | 


--------------------------------------------------------------------------------
/docs/source/examples/plot_train_classifier.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | ============================
  4 | Train an Acoustic Classifier
  5 | ============================
  6 | 
  7 | Train an acoustic classifier on speech or noise features.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_train`.
 10 | """
 11 | 
 12 | ###############################################################################################
 13 | #
 14 | import os, sys
 15 | import inspect
 16 | currentdir = os.path.dirname(os.path.abspath(
 17 |     inspect.getfile(inspect.currentframe())))
 18 | parentdir = os.path.dirname(currentdir)
 19 | parparentdir = os.path.dirname(parentdir)
 20 | packagedir = os.path.dirname(parparentdir)
 21 | sys.path.insert(0, packagedir)
 22 | 
 23 | import matplotlib.pyplot as plt
 24 | import IPython.display as ipd
 25 | package_dir = '../../../'
 26 | os.chdir(package_dir)
 27 | sp_dir = package_dir
 28 | 
 29 | 
 30 | #####################################################################
 31 | # Let's import soundpy for handling sound
 32 | import soundpy as sp
 33 | #####################################################################
 34 | # As well as the deep learning component of soundpy
 35 | from soundpy import models as spdl
 36 | 
 37 | 
 38 | ######################################################
 39 | # Prepare for Training: Data Organization
 40 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 41 | 
 42 | ##########################################################
 43 | # Set path relevant for audio data for this example
 44 | 
 45 | ######################################################
 46 | # I will load previously extracted features (from the Speech Commands Dataset) 
 47 | # See `soundpy.feats.save_features_datasets` or `soundpy.builtin.envclassifier_feats`
 48 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
 49 |     'envclassifier/example_feats_fbank/'
 50 | 
 51 | #########################################################
 52 | # What is in this folder?
 53 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
 54 | files = list(feature_extraction_dir.glob('*.*'))
 55 | for f in files:
 56 |     print(f.name)
 57 |   
 58 | #########################################################
 59 | # The .npy files contain the features themselves, in train, validation, and
 60 | # test datasets:
 61 | files = list(feature_extraction_dir.glob('*.npy'))
 62 | for f in files:
 63 |     print(f.name)
 64 |   
 65 | #########################################################
 66 | # The .csv files contain information about how the features were extracted
 67 | files = list(feature_extraction_dir.glob('*.csv'))
 68 | for f in files:
 69 |     print(f.name)
 70 | 
 71 | #########################################################
 72 | # We'll have a look at which features were extracted and other settings:
 73 | feat_settings = sp.utils.load_dict(
 74 |     feature_extraction_dir.joinpath('log_extraction_settings.csv'))
 75 | for key, value in feat_settings.items():
 76 |     print(key, ' --> ', value)
 77 |     
 78 | #########################################################
 79 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
 80 |     
 81 | #########################################################
 82 | # We'll have a look at the audio files that were assigned 
 83 | # to the train, val, and test datasets. 
 84 | audio_datasets = sp.utils.load_dict(
 85 |     feature_extraction_dir.joinpath('dataset_audiofiles.csv'))
 86 | count = 0
 87 | for key, value in audio_datasets.items():
 88 |     print(key, ' --> ', value)
 89 |     count += 1
 90 |     if count > 5:
 91 |         break
 92 | 
 93 | #############################################################
 94 | # Built-In Functionality: soundpy does everything for you
 95 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 96 | # For more about this function, see `soundpy.models.builtin.envclassifier_train`.
 97 | 
 98 | #############################################################
 99 | model_dir, history = spdl.envclassifier_train(
100 |     feature_extraction_dir = feature_extraction_dir,
101 |     epochs = 10,
102 |     patience = 5)
103 | 
104 | #############################################################
105 | # Where the model and logs are located:
106 | model_dir
107 | 
108 | #############################################################
109 | # Let's plot how the model performed (on this mini dataset)
110 | import matplotlib.pyplot as plt
111 | plt.clf()
112 | plt.plot(history.history['accuracy'])
113 | plt.plot(history.history['val_accuracy'])
114 | plt.title('model accuracy')
115 | plt.ylabel('accuracy')
116 | plt.xlabel('epoch')
117 | plt.legend(['train', 'val'], loc='upper right')
118 | plt.savefig('accuracy.png')
119 | 


--------------------------------------------------------------------------------
/docs/source/examples/plot_train_denoiser.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | =============================
  4 | Train a Denoising Autoencoder
  5 | =============================
  6 | 
  7 | Train a denoising autoencoder with clean and noisy acoustic features.
  8 | 
  9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_train`, 
 10 | `soundpy.builtin.denoiser_feats` and `soundpy.builtin.create_denoise_data`.
 11 | """
 12 | 
 13 | 
 14 | ###############################################################################################
 15 | # 
 16 | import os, sys
 17 | import inspect
 18 | currentdir = os.path.dirname(os.path.abspath(
 19 |     inspect.getfile(inspect.currentframe())))
 20 | parentdir = os.path.dirname(currentdir)
 21 | parparentdir = os.path.dirname(parentdir)
 22 | packagedir = os.path.dirname(parparentdir)
 23 | sys.path.insert(0, packagedir)
 24 | 
 25 | import matplotlib.pyplot as plt
 26 | import IPython.display as ipd
 27 | package_dir = '../../../'
 28 | os.chdir(package_dir)
 29 | sp_dir = package_dir
 30 | 
 31 | 
 32 | #####################################################################
 33 | # Let's import soundpy for handling sound
 34 | import soundpy as sp
 35 | #####################################################################
 36 | # As well as the deep learning component of soundpy
 37 | from soundpy import models as spdl
 38 | 
 39 | 
 40 | ######################################################
 41 | # Prepare for Training: Data Organization
 42 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 43 | 
 44 | ##########################################################
 45 | # Designate path relevant for accessing audiodata
 46 | 
 47 | 
 48 | ######################################################
 49 | # I will load previously extracted features (sample data), see `soundpy.feats.save_features_datasets` or `soundpy.builtin.denoiser_feats`
 50 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
 51 |     'denoiser/example_feats_fbank/'
 52 | 
 53 | #########################################################
 54 | # What is in this folder?
 55 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
 56 | files = list(feature_extraction_dir.glob('*.*'))
 57 | for f in files:
 58 |     print(f.name)
 59 |   
 60 | #########################################################
 61 | # The .npy files contain the features themselves, in train, validation, and
 62 | # test datasets:
 63 | files = list(feature_extraction_dir.glob('*.npy'))
 64 | for f in files:
 65 |     print(f.name)
 66 |   
 67 | #########################################################
 68 | # The .csv files contain information about how the features were extracted
 69 | files = list(feature_extraction_dir.glob('*.csv'))
 70 | for f in files:
 71 |     print(f.name)
 72 | 
 73 | #########################################################
 74 | # We'll have a look at which features were extracted and other settings:
 75 | feat_settings = sp.utils.load_dict(
 76 |     feature_extraction_dir.joinpath('log_extraction_settings.csv'))
 77 | for key, value in feat_settings.items():
 78 |     print(key, ' --> ', value)
 79 |     
 80 | #########################################################
 81 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
 82 |     
 83 | #########################################################
 84 | # We'll have a look at the audio files that were assigned 
 85 | # to the train, val, and test datasets.
 86 | audio_datasets = sp.utils.load_dict(
 87 |     feature_extraction_dir.joinpath('audiofiles_datasets_clean.csv'))
 88 | count = 0
 89 | for key, value in audio_datasets.items():
 90 |     print(key, ' --> ', value)
 91 |     count += 1
 92 |     if count > 5:
 93 |         break
 94 | 
 95 | #############################################################
 96 | # Built-In Functionality: soundpy does everything for you
 97 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 98 | # For more about this, see `soundpy.builtin.denoiser_train`.
 99 | 
100 | #############################################################
101 | model_dir, history = spdl.denoiser_train(
102 |     feature_extraction_dir = feature_extraction_dir,
103 |     epochs = 10)
104 | 
105 | #########################################################
106 | 
107 | 
108 | #############################################################
109 | # Where the model and logs are located:
110 | model_dir
111 | 
112 | 
113 | #############################################################
114 | # Let's plot how the model performed (on this mini dataset)
115 | import matplotlib.pyplot as plt
116 | plt.plot(history.history['loss'])
117 | plt.plot(history.history['val_loss'])
118 | plt.title('model loss')
119 | plt.ylabel('loss')
120 | plt.xlabel('epoch')
121 | plt.legend(['train', 'val'], loc='upper right')
122 | plt.savefig('loss.png')
123 | 


--------------------------------------------------------------------------------
/docs/source/exceptions.rst:
--------------------------------------------------------------------------------
1 | 
2 | Customized Errors
3 | -----------------
4 | 
5 | .. automodule:: soundpy.exceptions
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/feats.rst:
--------------------------------------------------------------------------------
1 | 
2 | Extract and manipulate audio features
3 | -------------------------------------
4 | 
5 | .. automodule:: soundpy.feats
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/files.rst:
--------------------------------------------------------------------------------
1 | 
2 | Working with audio files
3 | ------------------------
4 | 
5 | .. automodule:: soundpy.files
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/filters.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Filters: Wiener and Band Spectral Subtraction
 3 | ---------------------------------------------
 4 | 
 5 | .. automodule:: soundpy.filters
 6 |    :members:
 7 |    :undoc-members:
 8 |    :show-inheritance:
 9 | 
10 | .. autoclass:: soundpy.filters.FilterSettings
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 |    
15 |    .. automethod:: __init__
16 |    
17 | .. autoclass:: soundpy.filters.Filter
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 |    
22 |    .. automethod:: __init__
23 |    
24 |    
25 | .. autoclass:: soundpy.filters.WienerFilter
26 |    :members:
27 |    :undoc-members:
28 |    :show-inheritance:
29 |    
30 |    .. automethod:: __init__
31 |    
32 |    
33 | .. autoclass:: soundpy.filters.BandSubtraction
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 |    
38 |    .. automethod:: __init__
39 |    
40 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. SoundPy documentation master file, created by
 2 |    sphinx-quickstart on Mon Jun 15 11:57:18 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | SoundPy v0.1.0a3
 7 | ================  
 8 | 
 9 | Welcome to the docs!
10 | --------------------
11 | 
12 |    
13 | SoundPy is a research based Python package_ for exploring and experimenting with sound and deep learning. NOTE: SoundPy is in alpha stage of development; please forgive any bugs that pop up, and also feel free/encouraged to open an issue_. 
14 | 
15 | Those who might find this useful: 
16 | 
17 | * speech and sound enthusiasts
18 | * digital signal processing / mathematics / physics / acoustics enthusiasts
19 | * deep learning enthusiasts
20 | * researchers
21 | * linguists
22 | * psycholinguists
23 | 
24 | The main goal of SoundPy is to provide the code and functionality with more context via visualization, research, and mathematics. Most of the resources used to build the functionality stems from publicly available research and datasets.
25 | 
26 | As it covers quite a large range, from audio file conversion to implementation of trained neural networks, the purpose of SoundPy is not to be the perfect implementation of all functions (although that is also a goal :P ), but rather a peak into how they *can* be implemented, hopefully offering people a foundation for trying out different ways of implementation (feature extraction, building neural networks, etc.).
27 | 
28 | This project is still in the beginning stages and has a lot of room for growth, especially with contributors having a background / knowlege in data science, computer science, machine and deep learning, physics, acoustics, or dsp. Contributors from other backgrounds are also welcome! If you'd like SoundPy to do something it doesn't, try making it or create an issue.
29 | 
30 | .. _PyPI: https://pypi.org/project/soundpy/
31 | 
32 | .. _package: https://github.com/a-n-rose/Python-Sound-Tool/tree/development
33 | 
34 | .. _issue: https://github.com/a-n-rose/Python-Sound-Tool/issues
35 | 
36 | .. toctree:: 
37 |    :maxdepth: 2
38 |    
39 |    example_cases.rst
40 |    readme.rst
41 | 
42 |    
43 | .. toctree:: 
44 |    :maxdepth: 1
45 |    
46 |    changelog.rst
47 |    
48 | * :ref:`genindex`
49 | * :ref:`modindex`
50 | * :ref:`search`
51 | 
52 | :Author:
53 |     Aislyn Rose 
54 |     
55 |     rose.aislyn.noelle@gmail.com
56 |     
57 |     webpage_
58 |     
59 |     github_
60 |     
61 | .. _webpage: https://a-n-rose.github.io/
62 |  
63 | .. _github : https://github.com/a-n-rose
64 | 


--------------------------------------------------------------------------------
/docs/source/model_dataprep.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Feeding large datasets to models
 3 | --------------------------------
 4 | 
 5 | .. autoclass:: soundpy.models.dataprep.Generator
 6 |    :members:
 7 |    :undoc-members:
 8 |    :show-inheritance:
 9 |    
10 |    .. automethod:: __init__
11 | 
12 |    
13 | .. automodule:: soundpy.models.dataprep
14 |    :members:
15 |    :undoc-members:
16 |    :show-inheritance:
17 | 


--------------------------------------------------------------------------------
/docs/source/modelsetup.rst:
--------------------------------------------------------------------------------
1 | 
2 | Additional model setup (e.g. Early Stopping)
3 | --------------------------------------------
4 | 
5 | .. automodule:: soundpy.models.modelsetup
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
 1 | ==============================
 2 | SoundPy Functionality v0.1.0a3
 3 | ==============================
 4 | 
 5 | .. include:: builtin_sp.rst
 6 | 
 7 | .. include:: builtin_spdl.rst
 8 | 
 9 | .. include:: augment.rst
10 | 
11 | .. include:: files.rst
12 | 
13 | .. include:: datasets.rst
14 | 
15 | .. include:: dsp.rst
16 | 
17 | .. include:: filters.rst
18 | 
19 | .. include:: feats.rst
20 | 
21 | .. include:: template_models.rst
22 | 
23 | .. include:: modelsetup.rst
24 | 
25 | .. include:: model_dataprep.rst
26 | 
27 | .. include:: utils.rst
28 | 
29 | .. include:: exceptions.rst
30 | 


--------------------------------------------------------------------------------
/docs/source/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: modules.rst
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/docs/source/template_models.rst:
--------------------------------------------------------------------------------
1 | Template deep neural networks
2 | -----------------------------
3 | 
4 | .. automodule:: soundpy.models.template_models
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/utils.rst:
--------------------------------------------------------------------------------
1 | 
2 | Other useful non-specific functionality
3 | ---------------------------------------
4 | 
5 | .. automodule:: soundpy.utils
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/versions.rst:
--------------------------------------------------------------------------------
 1 | ******************************************
 2 | SoundPy Versions Available as PyPI Package
 3 | ******************************************
 4 | 
 5 | .. toctree::
 6 |     :maxdepth: 1
 7 | 
 8 |     0.1.0a2/index.rst
 9 | 
10 |     0.1.0a3/index.rst
11 | 
12 | 


--------------------------------------------------------------------------------
/new_version_updates.md:
--------------------------------------------------------------------------------
 1 | # Updates of v0.1.0a3 release:
 2 | 
 3 | ## Updates
 4 | - don't use librosa for feature extraction anymore. But compatible with previous versions.
 5 | - parameter: frames_per_sample and context_window, with depreciation warning
 6 | Just remove these parameters from feature extraction and limit to generators. Otherwise too messy and complex
 7 | - soundpy.models.builtin.implement_denoiser() raise warning if cleaned features cannot be 
 8 | converted to raw audio samples.
 9 | - BUG FIX: soundpy.feats.plot can now be used from within generator using backend Agg and 
10 | then switch to Tkinker backend using use_tkinker parameter for normal use outside of training.
11 | - require additional tensors to be added to the desired shape and then supplied to generator to make shape process more explicit in generator.
12 | 
13 | changed parameter (Generator) normalized to normalize (opposite bool); removed add_tensor_last parameter, adjusted grayscale2color sections: can be applied to 2D data; set sr default to 22050
14 | 
15 | - Got the augment cnn builtin functionality to run with pre-trained features.. needs cleaning
16 | - got plotsound, plot vad, and plot dom freq, to work with stereo sound
17 | 
18 | Removing from envclassifier_extract_train:
19 |     dataset_dict = None,
20 |     num_labels = None,
21 | 
22 | 
23 | ## Updates of v0.1.0a2 release:
24 | 
25 | ### Updated Dependencies
26 | - Updated dependencies to newest versions still compatible with Tensorflow 2.1.0
27 | - Note: bug in training with generators occurs with Tensorflow 2.2.0+. Models trained via generators fail to learn. Therefore, Tensorflow is limited to version 2.1.0 until that bug is fixed. 
28 | 
29 | ### GPU option added
30 | - provide instructions for running Docker image for GPU
31 | 
32 | ### soundpy.dsp.vad
33 | - add `use_beg_ms` parameter: improved VAD recognition of silences post speech.
34 | - raise warning for sample rates lower than 44100 Hz. VAD seems to fail at lower sample rates.
35 | 
36 | ### soundpy.feats.get_vad_samples and soundpy.feats.get_vad_stft
37 | - moved from dsp module to the feats module
38 | - add `extend_window_ms` paremeter: can extend VAD window if desired. Useful in higher SNR environments.
39 | - raise warning for sample rates lower than 44100 Hz. VAD seems to fail at lower sample rates.
40 | 
41 | ### added soundpy.feats.get_samples_clipped and soundpy.feats.get_stft_clipped
42 | - another option for VAD 
43 | - clips beginning and ending of audio data where high energy sound starts and ends.
44 | 
45 | ### soundpy.models.dataprep.GeneratorFeatExtraction 
46 | - can extract and augment features from audio files as each audio file fed to model. 
47 | - example can be viewed: soundpy.models.builtin.envclassifier_extract_train
48 | - note: still very experimental
49 | 
50 | ### soundpy.dsp.add_backgroundsound
51 | - improvements in the smoothness of the added signal.
52 | - soundpy.dsp.clip_at_zero
53 | - improved soundpy.dsp.vad and soundpy.feats.get_vad_stft
54 | 
55 | ### soundpy.feats.normalize 
56 | - can use it: soundpy.normalize (don't need to remember dsp or feats)
57 | 
58 | ### soundpy.dsp.remove_dc_bias
59 | - implemented in soundpy.files.loadsound() and soundpy.files.savesound()
60 | - vastly improves the ability to work with and combine signals.
61 | 
62 | ### soundpy.dsp.clip_at_zero
63 | - clips beginning and ending audio at zero crossings (at negative to positive zero crossings)
64 | - useful when concatenating signals
65 | - useful for removing clicks at beginning or ending of audio signals
66 | 
67 | ### soundpy.dsp.apply_sample_length
68 | - can now mirror the sound as a form of sound extention with parameter `mirror_sound`.
69 | 
70 | ### Removed soundpy_online (and therefore mybinder as well)
71 | - for the time being, this is too much work to keep up. Eventually plan on bringing this back in a more maintainable manner.
72 | 
73 | ### Added stereo sound functionality to the following functions:
74 | - soundpy.dsp.add_backgroundsound
75 | - soundpy.dsp.clip_at_zero
76 | - soundpy.dsp.calc_fft
77 | - soundpy.feats.get_stft
78 | - soundpy.feats.get_vad_stft
79 | 
80 | ### New functions related to stereo sound
81 | - soundpy.dsp.ismono for checking if a signal is mono or stereo
82 | - soundpy.dsp.average_channels for averaging amplitude in all channels (e.g. identifying when energetic sounds start / end: want to consider all channels)
83 | - soundpy.dsp.add_channels for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound)
84 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorflow>=2.1.0
 2 | numpy
 3 | scipy
 4 | scikit-learn
 5 | librosa
 6 | python-speech-features
 7 | matplotlib
 8 | soundfile
 9 | numba
10 | scikit-image>=0.17.2
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | from setuptools import setup, find_packages
 3 | 
 4 | # The directory containing this file
 5 | HERE = pathlib.Path(__file__).parent
 6 | 
 7 | # The text of the README file
 8 | README = (HERE / "README.md").read_text()
 9 | 
10 | dependencies=''
11 | with open("requirements.txt","r") as f:
12 |         dependencies = f.read().splitlines()
13 | 
14 | # This call to setup() does all the work
15 | setup(
16 |     name="soundpy",
17 |     version="0.1.0a3",
18 |     description="A research-based framework for exploring sound as well as machine learning in the context of sound.",
19 |     long_description=README,
20 |     long_description_content_type="text/markdown",
21 |     url="https://github.com/a-n-rose/Python-Sound-Tool",
22 |     author="Aislyn Rose",
23 |     author_email="rose.aislyn.noelle@gmail.com",
24 |     license="AGPL-3.0",
25 |     classifiers=[
26 |         "License :: OSI Approved :: GNU Affero General Public License v3",
27 |         "Programming Language :: Python :: 3",
28 |         "Programming Language :: Python :: 3.6",
29 |         "Programming Language :: Python :: 3.8",
30 |     ],
31 |     packages=find_packages(exclude=("tests","docs", "jupyter_notebooks")),
32 |     include_package_data=True,
33 |     install_requires=dependencies,
34 |     python_requires=">=3.6.9",
35 | )
36 | 


--------------------------------------------------------------------------------
/soundpy/__init__.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | from . import utils
 3 | from . import feats
 4 | from . import files
 5 | from . import datasets
 6 | from . import filters
 7 | from . import dsp
 8 | from . import builtin
 9 | from . import exceptions as errors
10 | from . import augment
11 | from .utils import check_dir, string2pathlib
12 | from .files import loadsound, savesound
13 | from .feats import plotsound, normalize
14 | from .filters import WienerFilter, BandSubtraction
15 | from .dsp import generate_sound, generate_noise
16 | from .builtin import envclassifier_feats, denoiser_feats, filtersignal 
17 | 
18 | __all__=['utils', 'feats', 'filters', 'WienerFilter', 'BandSubtraction', 
19 |          'filtersignal', 'dsp','errors', 'plotsound', 'loadsound', 'savesound',
20 |          'datasets', 'envclassifier_feats', 'denoiser_feats', 'generate_sound', 'playsound',
21 |          'generate_noise', 'builtin', 'augment', 'check_dir', 'string2pathlib',
22 |          'normalize']
23 | 


--------------------------------------------------------------------------------
/soundpy/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/soundpy/__init__.pyc


--------------------------------------------------------------------------------
/soundpy/exceptions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`soundpy.exceptions` module includes customized errors.
 3 | """
 4 | 
 5 | def notsufficientdata_error(numtrain, numval, numtest, expected_numtrain):
 6 |     raise ValueError('Not enough training data:'+\
 7 |         '\nNumber train samples: {} '.format(numtrain)+\
 8 |             '(Minumum expected: {})'.format(expected_numtrain)+\
 9 |         '\nNumber val samples: {}'.format(numval)+\
10 |             '\nNumber test samples: {}'.format(numtest) +\
11 |                 '\n\nPlease lower `perc_train` or collect more audio data.')
12 | 
13 | def numfeatures_incompatible_templatemodel():
14 |     raise ValueError('ERROR: Number of features is incompatible with the template model. '+\
15 |         'Try a higher number or rely on the defaults. Apologies for this inconvenience.')
16 | 


--------------------------------------------------------------------------------
/soundpy/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dataprep import Generator, GeneratorFeatExtraction, make_gen_callable
 2 | from .template_models import cnn_classifier, autoencoder_denoise, resnet50_classifier, \
 3 |     cnnlstm_classifier
 4 | from .modelsetup import setup_callbacks, setup_layers
 5 | from . import plot
 6 | from . import builtin
 7 | from .builtin import denoiser_train, envclassifier_train, denoiser_run, cnnlstm_train, \
 8 |      resnet50_train, envclassifier_extract_train, cnnlstm_extract_train, envclassifier_run
 9 | 
10 | __all__ = ['Generator', 'GeneratorFeatExtraction', 
11 |            'cnn_classifier', 'autoencoder_denoise', 'resnet50_classifier',
12 |            'setup_callbacks', 'plot', 'cnnlstm_classifier', 'builtin', 'denoiser_train',
13 |            'envclassifier_train', 'denoiser_run', 'cnnlstm_train', 'resnet50_train',
14 |            'envclassifier_extract_train','make_gen_callable', 'setup_layers',
15 |            'cnnlstm_extract_train', 'envclassifier_run']
16 | 


--------------------------------------------------------------------------------
/soundpy/models/plot.py:
--------------------------------------------------------------------------------
 1 | import tensorflow
 2 | from tensorflow.keras.models import Model
 3 | from tensorflow.keras.models import load_model 
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | import os, sys
 8 | import inspect
 9 | currentdir = os.path.dirname(os.path.abspath(
10 |     inspect.getfile(inspect.currentframe())))
11 | packagedir = os.path.dirname(currentdir)
12 | sys.path.insert(0, packagedir)
13 | import soundpy as pyst
14 | 
15 | 
16 | def featuremaps(features, model, image_dir='./feature_maps/'):
17 |     '''Saves the feature maps of each convolutional layer as .png file.
18 |     
19 |     References
20 |     ----------
21 |     Brownlee, Jason (2019, May, 6). How to Visualize Filters and Feature
22 |     Maps in Convolutional Neural Networks. Machine Learning Mastery.
23 |     https://machinelearningmastery.com/how-to-visualize-filters-and-feature-maps-in-convolutional-neural-networks/
24 |     '''
25 |     conv_idx = []
26 |     for i in range(len(model.layers)):
27 |         layer = model.layers[i]
28 |         if 'conv' in layer.name:
29 |             conv_idx.append(i)
30 |     for idx in conv_idx:
31 |         model_featmaps = Model(inputs = model.inputs,
32 |                     outputs = model.layers[idx].output)
33 |         featuremaps = model_featmaps.predict(features)
34 |         for i in range(featuremaps.shape[-1]):
35 |             plt.clf()
36 |             plt.imshow(featuremaps[0,:,:,i], cmap='gray')
37 |             image_dir = sp.utils.check_dir(image_dir, make=True)
38 |             image_path = image_dir.joinpath('layer_{}'.format(idx),
39 |                                             'featmap_{}.png'.format(i))
40 |             image_par = sp.utils.check_dir(image_path.parent, make=True)
41 |             plt.savefig(image_path)
42 | 


--------------------------------------------------------------------------------
/soundpy/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/soundpy/utils.pyc


--------------------------------------------------------------------------------
/start_jup_env.sh:
--------------------------------------------------------------------------------
1 | docker run -it --rm \
2 |             --gpus all \
3 |             --privileged=true \
4 |             -v "$PWD":"/root/soundpy/" \
5 |             -p 8888:8888 aju
6 |             #-v "/audiodir/data":"/root/soundpy/data" \
7 | 


--------------------------------------------------------------------------------
/tests/inspect_functions.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | From NLTK decorators: https://github.com/nltk/nltk/blob/develop/nltk/decorators.py
  3 | 
  4 | """
  5 | Decorator module by Michele Simionato <michelesimionato@libero.it>
  6 | Copyright Michele Simionato, distributed under the terms of the BSD License (see below).
  7 | http://www.phyast.pitt.edu/~micheles/python/documentation.html
  8 | Included in NLTK for its support of a nice memoization decorator.
  9 | """
 10 | '''
 11 | 
 12 | 
 13 | import inspect
 14 | 
 15 | def __legacysignature(signature):
 16 |     """
 17 |     For retrocompatibility reasons, we don't use a standard Signature.
 18 |     Instead, we use the string generated by this method.
 19 |     Basically, from a Signature we create a string and remove the default values.
 20 |     """
 21 |     listsignature = str(signature)[1:-1].split(",")
 22 |     for counter, param in enumerate(listsignature):
 23 |         if param.count("=") > 0:
 24 |             listsignature[counter] = param[0:param.index("=")].strip()
 25 |         else:
 26 |             listsignature[counter] = param.strip()
 27 |     return ", ".join(listsignature)
 28 | 
 29 | def getinfo(func):
 30 |     """
 31 |     Returns an info dictionary containing:
 32 |     - name (the name of the function : str)
 33 |     - argnames (the names of the arguments : list)
 34 |     - defaults (the values of the default arguments : tuple)
 35 |     - signature (the signature : str)
 36 |     - fullsignature (the full signature : Signature)
 37 |     - doc (the docstring : str)
 38 |     - module (the module name : str)
 39 |     - dict (the function __dict__ : str)
 40 |     >>> def f(self, x=1, y=2, *args, **kw): pass
 41 |     >>> info = getinfo(f)
 42 |     >>> info["name"]
 43 |     'f'
 44 |     >>> info["argnames"]
 45 |     ['self', 'x', 'y', 'args', 'kw']
 46 |     >>> info["defaults"]
 47 |     (1, 2)
 48 |     >>> info["signature"]
 49 |     'self, x, y, *args, **kw'
 50 |     >>> info["fullsignature"]
 51 |     <Signature (self, x=1, y=2, *args, **kw)>
 52 |     """
 53 |     assert inspect.ismethod(func) or inspect.isfunction(func)
 54 |     argspec = inspect.getfullargspec(func)
 55 |     regargs, varargs, varkwargs = argspec[:3]
 56 |     argnames = list(regargs)
 57 |     if varargs:
 58 |         argnames.append(varargs)
 59 |     if varkwargs:
 60 |         argnames.append(varkwargs)
 61 |     fullsignature = inspect.signature(func)
 62 |     # Convert Signature to str
 63 |     signature = __legacysignature(fullsignature)
 64 | 
 65 | 
 66 |     # pypy compatibility
 67 |     if hasattr(func, "__closure__"):
 68 |         _closure = func.__closure__
 69 |         _globals = func.__globals__
 70 |     else:
 71 |         _closure = func.func_closure
 72 |         _globals = func.func_globals
 73 | 
 74 |     return dict(
 75 |         name=func.__name__,
 76 |         argnames=argnames,
 77 |         signature=signature,
 78 |         fullsignature=fullsignature,
 79 |         defaults=func.__defaults__,
 80 |         doc=func.__doc__,
 81 |         module=func.__module__,
 82 |         dict=func.__dict__,
 83 |         globals=_globals,
 84 |         closure=_closure,
 85 |     )
 86 | 
 87 | ##########################     LEGALESE    ###############################
 88 | 
 89 | ##   Redistributions of source code must retain the above copyright
 90 | ##   notice, this list of conditions and the following disclaimer.
 91 | ##   Redistributions in bytecode form must reproduce the above copyright
 92 | ##   notice, this list of conditions and the following disclaimer in
 93 | ##   the documentation and/or other materials provided with the
 94 | ##   distribution.
 95 | 
 96 | ##   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 97 | ##   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 98 | ##   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 99 | ##   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
100 | ##   HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
101 | ##   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
102 | ##   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
103 | ##   OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
104 | ##   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
105 | ##   TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
106 | ##   USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
107 | ##   DAMAGE.
108 | 


--------------------------------------------------------------------------------
/tests/utils_test.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | import inspect
  3 | currentdir = os.path.dirname(os.path.abspath(
  4 |     inspect.getfile(inspect.currentframe())))
  5 | parentdir = os.path.dirname(currentdir)
  6 | sys.path.insert(0, parentdir)
  7 | 
  8 | import numpy as np
  9 | import pytest
 10 | import librosa
 11 | import pathlib
 12 | import soundpy as sp
 13 | 
 14 | audio_dir = 'test_audio/'
 15 | test_audiofile = '{}audio2channels.wav'.format(audio_dir)
 16 | 
 17 | 
 18 | 
 19 | def test_path_or_samples_str():
 20 |     item_type = sp.utils.path_or_samples(test_audiofile)
 21 |     assert item_type == 'path'
 22 |     
 23 | def test_path_or_samples_pathlib():
 24 |     item_type = sp.utils.path_or_samples(pathlib.Path(test_audiofile))
 25 |     assert item_type == 'path'
 26 |     
 27 | def test_path_or_samples_tuple_librosa():
 28 |     item = librosa.load(test_audiofile)
 29 |     item_type = sp.utils.path_or_samples(item)
 30 |     assert item_type == 'samples'
 31 |     
 32 | def test_path_or_samples_tuple_not_real_samples():
 33 |     item = (np.ndarray([1,2,3]), 4)
 34 |     item_type = sp.utils.path_or_samples(item)
 35 |     assert item_type == 'samples'
 36 |     
 37 | def test_path_or_samples_str_not_real_path():
 38 |     print('IF TEST FAILES: For now, function does not test for path validity.')
 39 |     with pytest.raises(ValueError):
 40 |         item_type = sp.utils.path_or_samples('blah')
 41 |     
 42 | def test_path_or_samples_pathlib_not_real_path():
 43 |     print('IF TEST FAILES: For now, function does not test for path validity.')
 44 |     with pytest.raises(ValueError):
 45 |         item_type = sp.utils.path_or_samples(pathlib.Path('blah'))
 46 |     
 47 | def test_match_dtype_float2int():
 48 |     array_original = np.array([1,2,3,4])
 49 |     array_to_change = np.array([1.,2.,3.,4.,5.])
 50 |     array_adjusted = sp.utils.match_dtype(array_to_change, array_original)
 51 |     assert array_original.dtype == array_adjusted.dtype
 52 |     assert len(array_to_change) == len(array_adjusted)
 53 |     assert np.array_equal(array_to_change, array_adjusted)
 54 |     assert array_to_change.dtype != array_original.dtype
 55 |     
 56 | def test_match_dtype_int2float():
 57 |     array_original = np.array([1.,2.,3.,4.])
 58 |     array_to_change = np.array([1,2,3,4,5])
 59 |     array_adjusted = sp.utils.match_dtype(array_to_change, array_original)
 60 |     assert array_original.dtype == array_adjusted.dtype
 61 |     assert len(array_to_change) == len(array_adjusted)
 62 |     assert np.array_equal(array_to_change, array_adjusted)
 63 |     assert array_to_change.dtype != array_original.dtype
 64 |     
 65 | def test_shape_samps_channels_too_many_dimensions():
 66 |     input_data = np.array([1,2,3,4,5,6,7,8,9,10,11,12]).reshape(2,3,2)
 67 |     with pytest.raises(ValueError):
 68 |         output_data = sp.dsp.shape_samps_channels(input_data)
 69 | 
 70 | def test_check_dir_default_create():
 71 |     test_dir = './testtesttest/'
 72 |     test_dir = sp.utils.check_dir(test_dir)
 73 |     assert isinstance(test_dir, pathlib.PosixPath)
 74 |     assert os.path.exists(test_dir)
 75 |     os.rmdir(test_dir)
 76 |     
 77 | def test_check_dir_check_exists():
 78 |     test_dir = './testtesttest/'
 79 |     test_dir = sp.utils.check_dir(test_dir, make=True)
 80 |     test_dir = sp.utils.check_dir(test_dir, make=False)
 81 |     assert isinstance(test_dir, pathlib.PosixPath)
 82 |     assert os.path.exists(test_dir)
 83 |     os.rmdir(test_dir)
 84 |     
 85 | def test_check_dir_check_exists_raiseerror():
 86 |     test_dir = './testtesttest/'
 87 |     with pytest.raises(FileNotFoundError):
 88 |         test_dir = sp.utils.check_dir(test_dir, make=False)
 89 |     
 90 | def test_check_dir_check_exists_notwriteinto_raiseerror():
 91 |     test_dir = './testtesttest/'
 92 |     test_dir = sp.utils.check_dir(test_dir, make=True)
 93 |     with pytest.raises(FileExistsError):
 94 |         test_dir = sp.utils.check_dir(test_dir, make=False, append=False)
 95 |     os.rmdir(test_dir)
 96 |     
 97 | def test_check_dir_pathwithextension_raiseerror():
 98 |     test_dir = './testtesttest.py/'
 99 |     with pytest.raises(TypeError):
100 |         test_dir = sp.utils.check_dir(test_dir, make=False)
101 |         
102 | def test_string2list():
103 |     audiofiles = sp.files.collect_audiofiles(audio_dir,wav_only=False,
104 |                                          recursive=False)
105 |     audiofiles_string = str(audiofiles)
106 |     audiofiles_checked = sp.utils.restore_dictvalue(audiofiles_string)
107 |     assert audiofiles ==  audiofiles_checked
108 |     
109 | def test_string2list_loaddict():
110 |     audiofiles = sp.files.collect_audiofiles(audio_dir,wav_only=False,
111 |                                          recursive=False)
112 |     d = dict([(0,audiofiles)])
113 |     test_dict_path = 'testest.csv'
114 |     if os.path.exists(test_dict_path):
115 |         os.remove(test_dict_path)
116 |     d_path = sp.utils.save_dict(
117 |         dict2save = d, 
118 |         filename = test_dict_path)
119 |     d_loaded = sp.utils.load_dict(d_path)
120 |     for i, key in enumerate(d_loaded):
121 |         key = key
122 |     audiofiles_string = d_loaded[key]
123 |     audiofiles_checked = sp.utils.restore_dictvalue(audiofiles_string)
124 |     assert audiofiles ==  audiofiles_checked
125 |     os.remove(test_dict_path)
126 |     
127 | def test_restore_dictvalue_list_of_tuples():
128 |     pass
129 | 
130 | def test_restore_dictvalue_regular_string():
131 |     expected = 'hann'
132 |     got = sp.utils.restore_dictvalue(expected)
133 |     assert expected == got
134 | 
135 | def test_restore_dictvalue_None():
136 |     expected = None 
137 |     string_val = str(expected)
138 |     got = sp.utils.restore_dictvalue(string_val)
139 |     assert expected == got
140 | 
141 | def test_restore_dictvalue_True():
142 |     expected = True
143 |     string_val = str(expected)
144 |     got = sp.utils.restore_dictvalue(string_val)
145 |     assert expected == got
146 |     
147 | def test_restore_dictvalue_False():
148 |     expected = False 
149 |     string_val = str(expected)
150 |     got = sp.utils.restore_dictvalue(string_val)
151 |     assert expected == got
152 | 
153 | def test_restore_dictvalue_int():
154 |     expected = 1
155 |     string_val = str(expected)
156 |     got = sp.utils.restore_dictvalue(string_val)
157 |     assert expected == got
158 | 
159 | def test_restore_dictvalue_float():
160 |     expected = 1.0
161 |     string_val = str(expected)
162 |     got = sp.utils.restore_dictvalue(string_val)
163 |     assert expected == got
164 | 
165 | def test_restore_dictvalue_tuple():
166 |     expected = (3,4)
167 |     string_val = str(expected)
168 |     got = sp.utils.restore_dictvalue(string_val)
169 |     assert expected == got
170 |     
171 | def test_restore_dictvalue_list_of_pathwaystrings():
172 |     expected = ['audio1.wav','audio2.wav','audio3.wav']
173 |     string_list = str(expected)
174 |     got = sp.utils.restore_dictvalue(string_list)
175 |     assert expected == got
176 |     
177 | def test_restore_dictvalue_list_of_pathlib_ojbect_strings():
178 |     expected = [pathlib.Path('audio1.wav'),pathlib.Path('audio2.wav'),pathlib.Path('audio3.wav')]
179 |     string_list = str(expected)
180 |     got = sp.utils.restore_dictvalue(string_list)
181 |     assert expected == got
182 |     
183 | def test_restore_dictvalue_list_of_pathwaystrings_nested():
184 |     expected = [['audio1.wav','audio2.wav'],['audio3.wav']]
185 |     string_list = str(expected)
186 |     got = sp.utils.restore_dictvalue(string_list)
187 |     assert expected == got
188 |     
189 | def test_restore_dictvalue_list_of_pathlib_ojbect_strings_nested():
190 |     expected = [[pathlib.Path('audio1.wav'),pathlib.Path('audio2.wav')],[pathlib.Path('audio3.wav')]]
191 |     string_list = str(expected)
192 |     with pytest.raises(ValueError):
193 |         got = sp.utils.restore_dictvalue(string_list)
194 |     
195 | def test_restore_dictvalue_tuple_labeledpaths():
196 |     expected = [(1, 'audio1.wav'),(2, 'audio2.wav'),(3, 'audio3.wav')]
197 |     string_list = str(expected)
198 |     got = sp.utils.restore_dictvalue(string_list)
199 |     assert expected == got
200 |     
201 | def test_restore_dictvalue_tuple_labeled_pathlibojbects():
202 |     expected = [(1, pathlib.Path('audio1.wav')),(2, pathlib.Path('audio2.wav')),(3, pathlib.Path('audio3.wav'))]
203 |     string_list = str(expected)
204 |     got = sp.utils.restore_dictvalue(string_list)
205 |     assert expected == got
206 | 


--------------------------------------------------------------------------------
/tests_requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | 


--------------------------------------------------------------------------------