├── .gitignore
├── Copying.docx
├── Dockerfile
├── GNU_AGPL_full.docx
├── LICENSE.md
├── README.md
├── THIRD-PARTY-NOTICES.docx
├── TODO.md
├── audiodata
├── background_samples
│ ├── README.md
│ ├── cafe.wav
│ ├── fridge.wav
│ └── traffic.wav
├── car_horn.wav
├── models
│ └── denoiser
│ │ ├── example_denoiser_stft.h5
│ │ ├── log.csv
│ │ └── log_extraction_settings.csv
└── python.wav
├── build_aju_image.sh
├── doc_requirements.txt
├── docs
└── source
│ ├── 0.1.0a2
│ ├── augment.rst
│ ├── builtin_sp.rst
│ ├── builtin_spdl.rst
│ ├── changelog.rst
│ ├── conf.py
│ ├── datasets.rst
│ ├── dsp.rst
│ ├── example_cases.rst
│ ├── examples
│ │ ├── README.txt
│ │ ├── plot_SNR_add_noise_to_datasets.py
│ │ ├── plot_augment_sound.py
│ │ ├── plot_dataset_info_formatting.py
│ │ ├── plot_featureprep_denoiser.py
│ │ ├── plot_featureprep_envclassifier.py
│ │ ├── plot_filter_out_noise.py
│ │ ├── plot_implement_denoiser.py
│ │ ├── plot_signals_and_features.py
│ │ ├── plot_train_classifier.py
│ │ ├── plot_train_denoiser.py
│ │ └── plot_vad_snr_filter.py
│ ├── exceptions.rst
│ ├── feats.rst
│ ├── files.rst
│ ├── filters.rst
│ ├── index.rst
│ ├── model_dataprep.rst
│ ├── modelsetup.rst
│ ├── modules.rst
│ ├── readme.rst
│ ├── template_models.rst
│ └── utils.rst
│ ├── 0.1.0a3
│ ├── augment.rst
│ ├── builtin_sp.rst
│ ├── builtin_spdl.rst
│ ├── changelog.rst
│ ├── conf.py
│ ├── datasets.rst
│ ├── dsp.rst
│ ├── example_cases.rst
│ ├── examples
│ │ ├── README.txt
│ │ ├── plot_SNR_add_noise_to_datasets.py
│ │ ├── plot_augment_sound.py
│ │ ├── plot_dataset_info_formatting.py
│ │ ├── plot_extract_augment_train_classifier.py
│ │ ├── plot_featureprep_denoiser.py
│ │ ├── plot_featureprep_envclassifier.py
│ │ ├── plot_filter_out_noise.py
│ │ ├── plot_implement_denoiser.py
│ │ ├── plot_signals_and_features.py
│ │ ├── plot_train_classifier.py
│ │ ├── plot_train_denoiser.py
│ │ └── plot_vad_snr_filter.py
│ ├── exceptions.rst
│ ├── feats.rst
│ ├── files.rst
│ ├── filters.rst
│ ├── index.rst
│ ├── model_dataprep.rst
│ ├── modelsetup.rst
│ ├── modules.rst
│ ├── readme.rst
│ ├── template_models.rst
│ ├── utils.rst
│ └── versions.rst
│ ├── augment.rst
│ ├── builtin_sp.rst
│ ├── builtin_spdl.rst
│ ├── changelog.rst
│ ├── conf.py
│ ├── datasets.rst
│ ├── dsp.rst
│ ├── example_cases.rst
│ ├── examples
│ ├── README.txt
│ ├── plot_SNR_add_noise_to_datasets.py
│ ├── plot_augment_sound.py
│ ├── plot_dataset_info_formatting.py
│ ├── plot_extract_augment_train_classifier.py
│ ├── plot_featureprep_denoiser.py
│ ├── plot_featureprep_envclassifier.py
│ ├── plot_filter_out_noise.py
│ ├── plot_implement_denoiser.py
│ ├── plot_signals_and_features.py
│ ├── plot_train_classifier.py
│ ├── plot_train_denoiser.py
│ └── plot_vad_snr_filter.py
│ ├── exceptions.rst
│ ├── feats.rst
│ ├── files.rst
│ ├── filters.rst
│ ├── index.rst
│ ├── model_dataprep.rst
│ ├── modelsetup.rst
│ ├── modules.rst
│ ├── readme.rst
│ ├── template_models.rst
│ ├── utils.rst
│ └── versions.rst
├── jupyter_notebooks
├── augment_sound_machine_learning.ipynb
├── filter_out_noise.ipynb
├── generate_signals_noise_snr.ipynb
├── implement_denoiser.ipynb
├── plot_vad_snr_filter.ipynb
└── speech_noise_SNR.ipynb
├── new_version_updates.md
├── requirements.txt
├── setup.py
├── soundpy
├── __init__.py
├── __init__.pyc
├── augment.py
├── builtin.py
├── datasets.py
├── dsp.py
├── exceptions.py
├── feats.py
├── files.py
├── filters.py
├── models
│ ├── __init__.py
│ ├── builtin.py
│ ├── dataprep.py
│ ├── modelsetup.py
│ ├── plot.py
│ └── template_models.py
├── utils.py
└── utils.pyc
├── start_jup_env.sh
├── tests
├── datasets_test.py
├── dsp_test.py
├── feats_test.py
├── filters_test.py
├── inspect_functions.py
└── utils_test.py
└── tests_requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | env/
2 | ve/
3 | __pycache__/
4 | saved_features_and_models/
5 | audiodata/
6 | images/
7 | images_1/
8 | audiodata2/
9 | audiodata3/
10 | .ipynb_checkpoints/
11 | env2/
12 | env3/
13 | docs/build/
14 | docs/doc_layout.md
15 | docs/Makefile
16 | docs/make.bat
17 | docs/source/auto_examples/
18 | example_dir/
19 | tests/testing_pypi/
20 | test_audio/
21 | compare_augmentations_right/
22 | compare_augmentations_nine/
23 | build/
24 | *.npy
25 | dev_env/
26 | docs/source/examples/example_feats_models/
27 | *.png
28 | example_feats_models/
29 | update_env/
30 | debug_env/
31 | p3_test/
32 |
33 |
--------------------------------------------------------------------------------
/Copying.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/Copying.docx
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM tensorflow/tensorflow:2.1.0-gpu-py3
2 |
3 | RUN apt update && apt upgrade -y
4 |
5 | RUN apt-get install -y libsndfile1
6 |
7 | RUN python -m pip install --upgrade pip
8 |
9 | RUN pip install -U soundfile \
10 | librosa \
11 | python_speech_features \
12 | notebook \
13 | matplotlib
14 |
15 | RUN pip install -U scikit-image
16 |
17 | RUN mkdir /root/soundpy/
18 |
19 | WORKDIR /root/soundpy/
20 |
--------------------------------------------------------------------------------
/GNU_AGPL_full.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/GNU_AGPL_full.docx
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | ## AGPL-3.0 License
2 |
3 | Copyright (c) 2020, Aislyn Rose.
4 |
5 | Permission to use, copy, modify, and/or distribute this software
6 | under the terms of the GNU General Public License as published by the
7 | Free Software Foundation, either version 3 of the License, or (at your option)
8 | any later version.
9 |
10 | The SoundPy framework is distributed in the hope that it will be useful, but
11 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 | details.
14 |
--------------------------------------------------------------------------------
/THIRD-PARTY-NOTICES.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/THIRD-PARTY-NOTICES.docx
--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 |
2 | ## Current
3 | - make it easier to use / build different models
4 | - implement autoencoder model
5 | - implement denoising with autoencoder model
6 | - build autoencoder in keras
7 | - build autoencoder in pytorch
8 | - build via docker image
9 |
10 | ## Functionality
11 |
12 | - autoencoder training
13 | - get postfilter to work on spectral subtraction
14 | - set power_scale default to 'power_to_db'?
15 | - functions to use librosa or not to perform tasks (librosa doesn't work on notebooks.ai for example)
16 | - measure level of snr
17 | - measure quality of filtering/speech enhancement
18 | - measure signal similarity
19 | - source separation
20 | - gender switch
21 | - text to speech
22 | - speech to text
23 | - dataset exploration (visualize 10 random samples/ based on size?, etc.)
24 | - simple inclusion of noise reduction into training models
25 | - pysoundtool and pysoundtool.online version? (use librosa vs no librosa)
26 |
27 | ## Presentation
28 |
29 | - blog post on each set of functionalities
30 | - presentation of examples
31 | - get documentation online
32 | - simplify functions
33 | - improve documentation (references, examples, testing, data shapes!!, help options)
34 |
35 | ## Testing
36 |
37 | - expand test cases
38 | - efficiency of code
39 |
40 | ## Organization
41 |
42 | - reorganize based on use... how import statement should work
43 | - make sample_rate, samprate, samplingrate, sr namespace consistent
44 | - make features/feature_type namespace consistent
45 | - use keyword arguments for librosa and scipy?
46 | - simplify
47 |
48 |
49 | ## Organization ideas:
50 |
51 | pyst.loadsound(audiofile, sr)
52 | pyst.playsound(audiofile, sr)?
53 | pyst.plotsound(audiofile, sr, feature_type)
54 |
55 | pyst.data.train_val_test(input_data, output_data)
56 | pyst.data.analyze(audo_dir)? For example for audio types, lengths?, sizes? etc. Useful for logging?
57 | pyst.feats.plot()
58 | pyst.feats.hear()
59 | pyst.feats.extract()
60 | model = pyst.models.speechrec_simple() # model will be a class instance..
61 | history = pyst.models.train(model, train_path, val_path)
62 | matplotplib.pyplot.plot(history) ?
63 | pyst.models.plot(history)
64 | pyst.models.run(model, test_path)
65 |
66 | pyst.filters.wiener()
67 | pyst.filters.bandsubtraction()
68 | pyst.models.soundclassifier()
69 | pyst.models.autoencoder_denoise()
70 | pyst.models.speechrec()
71 |
--------------------------------------------------------------------------------
/audiodata/background_samples/README.md:
--------------------------------------------------------------------------------
1 | ## Background Noise Examples
2 |
3 | These sounds were downloaded from freesound.org and are licensed under the Creative Commons 0 License.
4 |
5 | They have been limited to 10 seconds and the sample rate reduced to 16Hz to reduce their sizes.
6 |
7 | ### cafe.wav
8 |
9 | 387030__antonybk__cafe-takk-northern-quarter-manchester.wav
10 |
11 | ### traffic.wav
12 |
13 | 261344__ivolipa__city-traffic-day.wav
14 |
15 | ### fridge.wav
16 |
17 | 237399__squareal__fridge-tone.wav
18 |
--------------------------------------------------------------------------------
/audiodata/background_samples/cafe.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/background_samples/cafe.wav
--------------------------------------------------------------------------------
/audiodata/background_samples/fridge.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/background_samples/fridge.wav
--------------------------------------------------------------------------------
/audiodata/background_samples/traffic.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/background_samples/traffic.wav
--------------------------------------------------------------------------------
/audiodata/car_horn.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/car_horn.wav
--------------------------------------------------------------------------------
/audiodata/models/denoiser/example_denoiser_stft.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/models/denoiser/example_denoiser_stft.h5
--------------------------------------------------------------------------------
/audiodata/models/denoiser/log_extraction_settings.csv:
--------------------------------------------------------------------------------
1 | dur_sec,3
2 | feature_type,stft noisy
3 | feat_type,stft
4 | complex_vals,True
5 | sr,22050
6 | num_feats,177
7 | n_fft,352
8 | win_size_ms,16
9 | frame_length,352
10 | percent_overlap,0.5
11 | window,hann
12 | frames_per_sample,11
13 | labeled_data,False
14 | visualize,True
15 | input_shape,"(35, 11, 177)"
16 | desired_shape,"(385, 177)"
17 | use_librosa,True
18 | center,True
19 | mode,reflect
20 | subsection_data,True
21 | divide_factor,10
22 |
--------------------------------------------------------------------------------
/audiodata/python.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/audiodata/python.wav
--------------------------------------------------------------------------------
/build_aju_image.sh:
--------------------------------------------------------------------------------
1 | # chmod u+x build_aju_image.sh
2 |
3 | docker build . -t aju
4 |
--------------------------------------------------------------------------------
/doc_requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx-rtd-theme
2 | sphinx-gallery
3 | numpydoc
4 | pillow
5 | ipython
6 | pandas
7 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/augment.rst:
--------------------------------------------------------------------------------
1 |
2 | Augment audio data
3 | ------------------
4 |
5 | .. automodule:: soundpy.augment
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/builtin_sp.rst:
--------------------------------------------------------------------------------
1 |
2 | Built-In Functionality (non Deep Learning)
3 | ------------------------------------------
4 |
5 | .. automodule:: soundpy.builtin
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/builtin_spdl.rst:
--------------------------------------------------------------------------------
1 |
2 | Built-In Functionality (Deep Learning)
3 | --------------------------------------
4 |
5 | .. automodule:: soundpy.models.builtin
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/changelog.rst:
--------------------------------------------------------------------------------
1 | *********
2 | Changelog
3 | *********
4 |
5 | v0.1.0a
6 | =======
7 |
8 | v0.1.0a2
9 | --------
10 | 2020-08-13
11 |
12 |
13 | Bug fixes
14 | - added `use_beg_ms` parameter in `soundpy.dsp.vad`: improved VAD recognition of silences post speech.
15 |
16 | Features
17 | - added GPU option: provide instructions and Docker image for running SoundPy with GPU
18 | - added `extend_window_ms` paremeter to `soundpy.feats.get_vad_samples` and `soundpy.feats.get_vad_stft`: can extend VAD window if desired. Useful in higher SNR environments.
19 | - added `soundpy.feats.get_samples_clipped` and `soundpy.feats.get_stft_clipped` to clip off beginning and ending silences.
20 | - added `beg_end_clipped` parameter to `soundpy.feats.plot_vad` to visualize VAD by clipping the beginning and ending silences (if True) or VAD instances throughout the signal (if False).
21 | - added `soundpy.models.dataprep.GeneratorFeatExtraction` class for extracting and augmenting features during training (still experimental).
22 | - added `soundpy.models.builtin.envclassifier_extract_train` as an example of extracting and augmenting features during training (still experimental).
23 | - added `soundpy.dsp.clip_at_zero` to enable smoother concatenations of signals and enables removal of clicks at beginning and ending of signals.
24 | - added `soundpy.dsp.remove_dc_bias` to enable smoother concatenations of signals
25 | - added and set `remove_dc` parameter to True in `soundpy.files.loadsound` and `soundpy.files.savesound` to ensure signals all have mean zero.
26 | - added `mirror_sound` option to `soundpy.dsp.apply_sample_length` as a way to extend sound.
27 | - added `soundpy.dsp.ismono` to check if samples were mono or stereo.
28 | - added `soundpy.dsp.average_channels` to average sample amplitudes across channels, e.g. to identify where high energy begins / ends in the signal without disregarding additional channels (if stereo sound).
29 | - added `soundpy.dsp.add_channels` for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound)
30 | - added stereo sound functionality to `soundpy.dsp.add_backgroundsound`, `soundpy.dsp.clip_at_zero`, `soundpy.dsp.calc_fft`, `soundpy.feats.get_stft`, `soundpy.feats.get_vad_stft`
31 |
32 |
33 | Other changes
34 | - name change: from pysoundtool to soundpy: simpler
35 | - updated dependencies to newest versions still compatible with Tensorflow 2.1.0
36 | - moved `soundpy.dsp.get_vad_samples` to `soundpy.feats.get_vad_samples`
37 | - moved `soundpy.dsp.get_vad_stft` to `soundpy.feats.get_vad_stft`
38 | - name change: allow `soundpy.feats.normalize` to be used as `soundpy.normalize`
39 | - removed `pysoundtool_online` and mybinder button as maintaining the online version was not easily done. Aim to reimplement at some point.
40 |
41 |
42 |
43 | v0.1.0a1
44 | ========
45 |
46 | Initial public alpha release.
47 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/datasets.rst:
--------------------------------------------------------------------------------
1 |
2 | Organizing datasets
3 | -------------------
4 |
5 | .. automodule:: soundpy.datasets
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/dsp.rst:
--------------------------------------------------------------------------------
1 |
2 | Working with signals
3 | --------------------
4 |
5 | .. automodule:: soundpy.dsp
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/example_cases.rst:
--------------------------------------------------------------------------------
1 | .. toctree::
2 | :maxdepth: 2
3 |
4 | .. include:: auto_examples/index.rst
5 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/README.txt:
--------------------------------------------------------------------------------
1 |
2 | -----------------------------
3 | SoundPy Example Use Cases
4 | -----------------------------
5 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_SNR_add_noise_to_datasets.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 | """
4 | ==========================================
5 | Add Noise to Speech at Specific SNR Levels
6 | ==========================================
7 |
8 | Add noise to speech at specific signal-to-noise ratio levels.
9 |
10 | To see how soundpy implements this, see `soundpy.dsp.add_backgroundsound`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 |
17 |
18 | #####################################################################
19 | # Let's import soundpy, and ipd for playing audio data
20 | import soundpy as sp
21 | import IPython.display as ipd
22 |
23 |
24 | ######################################################
25 | # Define the speech and noise data samples
26 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
27 |
28 | ######################################################
29 | # I will use speech and noise data from the soundpy repo.
30 |
31 | ##########################################################
32 | # Designate path relevant for accessing audiodata
33 | sp_dir = '../../../'
34 |
35 | ##########################################################
36 | # Speech sample:
37 | speech_sample = '{}audiodata/python.wav'.format(sp_dir)
38 | speech_sample = sp.utils.string2pathlib(speech_sample)
39 | # as pathlib object, can do the following:
40 | word = speech_sample.stem
41 | word
42 |
43 | ##########################################################
44 | # Noise sample:
45 | noise_sample = '{}audiodata/background_samples/cafe.wav'.format(sp_dir)
46 | noise_sample = sp.utils.string2pathlib(noise_sample)
47 | # as pathlib object, can do the following:
48 | noise = noise_sample.stem
49 | noise
50 |
51 |
52 | ##########################################################
53 | # Hear Clean Speech
54 | # ~~~~~~~~~~~~~~~~~
55 | # I'm using a higher sample rate here as calculating SNR
56 | # performs best upwards of 44100 Hz.
57 | sr = 44100
58 | s, sr = sp.loadsound(speech_sample, sr = sr)
59 | ipd.Audio(s,rate=sr)
60 |
61 |
62 | ##########################################################
63 | # Hear Noise
64 | # ~~~~~~~~~~
65 | n, sr = sp.loadsound(noise_sample, sr = sr)
66 | ipd.Audio(n,rate=sr)
67 |
68 |
69 | ##########################################################
70 | # Hear Signal-to-Noise Ratio 20
71 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
72 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(
73 | speech_sample,
74 | noise_sample,
75 | sr = sr,
76 | snr = 20)
77 | ipd.Audio(noisyspeech_20snr,rate=sr)
78 |
79 | ########################################################
80 | # `snr20` is simply the measured SNR post adjustment fo the noise signal.
81 | # This is useful to check that the indicated snr is at least close
82 | # to the resulting snr.
83 | snr20
84 |
85 | ##########################################################
86 | # Hear Signal-to-Noise Ratio 5
87 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
88 | noisyspeech_5snr, snr5 = sp.dsp.add_backgroundsound(
89 | speech_sample,
90 | noise_sample,
91 | sr = sr,
92 | snr = 5)
93 | ipd.Audio(noisyspeech_5snr,rate=sr)
94 |
95 | #########################################################
96 | snr5
97 |
98 | ######################################################################
99 | # Visualize the Audio Samples
100 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^
101 |
102 | ######################################################################
103 | # See Clean Speech (raw signal)
104 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
105 | sp.plotsound(speech_sample, feature_type='signal',
106 | sr = sr, title = 'Speech: ' + word.upper())
107 |
108 | ######################################################################
109 | # See Clean Speech (stft)
110 | # ~~~~~~~~~~~~~~~~~~~~~~~
111 | sp.plotsound(speech_sample, feature_type='stft',
112 | sr = sr, title = 'Speech: ' + word.upper())
113 |
114 | ###################################################################### See Noise (raw signal)
115 | # ~~~~~~~~~~~~~~~~~~~~~~
116 | sp.plotsound(noise_sample, feature_type='signal',
117 | title = 'Noise: ' + noise.upper())
118 |
119 | ###################################################################### See Noise (stft)
120 | # ~~~~~~~~~~~~~~~~
121 | sp.plotsound(noise_sample, feature_type='stft',
122 | title = 'Noise: ' + noise.upper())
123 |
124 | ######################################################################
125 | # See Noisy Speech: SNR 20 (raw signal)
126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
127 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',
128 | title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper()))
129 |
130 | ######################################################################
131 | # See Noisy Speech: SNR 20 (stft)
132 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
133 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'stft',
134 | title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper()))
135 |
136 | ######################################################################
137 | # See Noisy Speech: SNR 5 (raw signal)
138 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
139 | sp.plotsound(noisyspeech_5snr, sr = sr, feature_type = 'signal',
140 | title = '"{}" with {} noise at SNR 5'.format(word.upper(), noise.upper()))
141 |
142 | ######################################################################
143 | # See Noisy Speech: SNR 5 (stft)
144 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
145 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'stft',
146 | title = '"{}" with {} noise at SNR 5'.format(word.upper(), noise.upper()))
147 |
148 | ######################################################################
149 | # Make Combined Sound Longer
150 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^
151 |
152 | ##########################################################
153 | # Pad Speech and Set Total Length
154 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
155 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(
156 | speech_sample,
157 | noise_sample,
158 | sr = sr,
159 | snr = 20,
160 | pad_mainsound_sec = 1,
161 | total_len_sec = 4)
162 |
163 | ##########################################################
164 | ipd.Audio(noisyspeech_20snr,rate=sr)
165 |
166 | ##########################################################
167 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',
168 | title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper()))
169 |
170 |
171 | ######################################################################
172 | # Make Combined Sound Shorter
173 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^
174 |
175 | ##########################################################
176 | # Set Total Length
177 | # ~~~~~~~~~~~~~~~~
178 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(
179 | speech_sample,
180 | noise_sample,
181 | sr = sr,
182 | snr = 20,
183 | total_len_sec = 0.5)
184 |
185 | ##########################################################
186 | ipd.Audio(noisyspeech_20snr,rate=sr)
187 |
188 | ##########################################################
189 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',
190 | title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper()))
191 |
192 | ######################################################################
193 | # Wrap the Background Sound
194 | # ^^^^^^^^^^^^^^^^^^^^^^^^^
195 | noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(
196 | speech_sample,
197 | noise_sample,
198 | sr = sr,
199 | snr = 20,
200 | wrap = True,
201 | pad_mainsound_sec = 2,
202 | total_len_sec = 5)
203 |
204 | ##########################################################
205 | ipd.Audio(noisyspeech_20snr,rate=sr)
206 |
207 | ##########################################################
208 | sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',
209 | title = '"{}" with {} noise at SNR 20'.format(word.upper(), noise.upper()))
210 |
211 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_dataset_info_formatting.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | ========================================
4 | Audio Dataset Exploration and Formatting
5 | ========================================
6 |
7 | Examine audio files within a dataset, and reformat them if desired.
8 |
9 | To see how soundpy implements this, see `soundpy.builtin.dataset_logger` and
10 | `soundpy.builtin.dataset_formatter`.
11 | """
12 |
13 | #####################################################################
14 | # Let's import soundpy
15 | import soundpy as sp
16 |
17 | ###############################################################################################
18 | #
19 | # Dataset Exploration
20 | # ^^^^^^^^^^^^^^^^^^^
21 |
22 | ##########################################################
23 | # Designate path relevant for accessing audiodata
24 | sp_dir = '../../../'
25 |
26 | ##########################################################
27 | # I will explore files in a small dataset on my computer with varying file formats.
28 | dataset_path = '{}audiodata2/'.format(sp_dir)
29 | dataset_info_dict = sp.builtin.dataset_logger('{}audiodata2/'.format(sp_dir));
30 |
31 | #########################################################################
32 | # This returns our data in a dictionary, perfect for exploring via Pandas
33 | import pandas as pd
34 | all_data = pd.DataFrame(dataset_info_dict).T
35 | all_data.head()
36 |
37 | ###################################
38 | # Let's have a look at the audio files and how uniform they are:
39 | print('formats: ', all_data.format_type.unique())
40 | print('bitdepth (types): ', all_data.bitdepth.unique())
41 | print('mean duration (sec): ', all_data.dur_sec.mean())
42 | print('std dev duration (sec): ', all_data.dur_sec.std())
43 | print('min sample rate: ', all_data.sr.min())
44 | print('max sample rate: ', all_data.sr.max())
45 | print('number of channels: ', all_data.num_channels.unique())
46 |
47 |
48 | ##########################################################
49 | # For a visual example, let's plot the count of various sample rates. (48000 Hz is high definition sound, 16000 Hz is wideband, and 8000 Hz is narrowband, similar to how speech sounds on the telephone.)
50 | all_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
51 |
52 | ###############################################################################################
53 | # Reformat a Dataset
54 | # ^^^^^^^^^^^^^^^^^^
55 |
56 | ##############################################################
57 | # Let's say we have a dataset that we want to make consistent.
58 | # We can do that with soundpy
59 | new_dataset_dir = sp.builtin.dataset_formatter(
60 | dataset_path,
61 | recursive = True, # we want all the audio, even in nested directories
62 | format='WAV',
63 | bitdepth = 16, # if set to None, a default bitdepth will be applied
64 | sr = 8000, # narrowband
65 | mono = True, # ensure data all have 1 channel
66 | dur_sec = 3, # audio will be limited to 3 seconds
67 | zeropad = True, # audio shorter than 3 seconds will be zeropadded
68 | new_dir = './example_dir/', # if None, a time-stamped directory will be created for you
69 | overwrite = False # can set to True if you want to overwrite files
70 | );
71 |
72 | ###############################################
73 | # Let's see what the audio data looks like now:
74 | dataset_formatted_dict = sp.builtin.dataset_logger(new_dataset_dir, recursive=True);
75 | formatted_data = pd.DataFrame(dataset_formatted_dict).T
76 |
77 | #####################
78 | formatted_data.head()
79 |
80 | ###################################
81 | print('audio formats: ', formatted_data.format_type.unique())
82 | print('bitdepth (types): ', formatted_data.bitdepth.unique())
83 | print('mean duration (sec): ', formatted_data.dur_sec.mean())
84 | print('std dev duration (sec): ', formatted_data.dur_sec.std())
85 | print('min sample rate: ', formatted_data.sr.min())
86 | print('max sample rate: ', formatted_data.sr.max())
87 | print('number of channels: ', formatted_data.num_channels.unique())
88 |
89 | ##########################################################
90 | # Now all the audio data is sampled at the same rate: 8000 Hz
91 | formatted_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
92 |
93 | ###########################################
94 | # There we go!
95 | # You can reformat only parts of the audio files, e.g. format or bitdepth.
96 | # If you leave parameters in sp.builtin.dataset_formatter as None, the original
97 | # settings of the audio file will be maintained (except for bitdepth.
98 | # A default bitdepth will be applied according to the format of the file); see `soundfile.default_subtype`.
99 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_featureprep_denoiser.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =======================================================
4 | Feature Extraction for Denoising: Clean and Noisy Audio
5 | =======================================================
6 |
7 | Extract acoustic features from clean and noisy datasets for
8 | training a denoising model, e.g. a denoising autoencoder.
9 |
10 | To see how soundpy implements this, see `soundpy.builtin.denoiser_feats`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 |
17 | #####################################################################
18 | import soundpy as sp
19 | import IPython.display as ipd
20 |
21 | ######################################################
22 | # Prepare for Extraction: Data Organization
23 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
24 |
25 | ######################################################
26 | # I will use a mini denoising dataset as an example
27 |
28 | # Example noisy data:
29 | data_noisy_dir = '/home/airos/Projects/Data/denoising/uwnu/noisy'
30 | # Example clean data:
31 | data_clean_dir = '/home/airos/Projects/Data/denoising/uwnu/clean/'
32 | # Where to save extracted features:
33 | data_features_dir = './audiodata/example_feats_models/denoiser/'
34 |
35 | ######################################################
36 | # Choose Feature Type
37 | # ~~~~~~~~~~~~~~~~~~~
38 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
39 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
40 |
41 | feature_type = 'stft'
42 | sr = 22050
43 |
44 | ######################################################
45 | # Set Duration of Audio
46 | # ~~~~~~~~~~~~~~~~~~~~~
47 | # How much audio in seconds used from each audio file.
48 | # the speech samples are about 3 seconds long.
49 | dur_sec = 3
50 |
51 | ######################################################
52 | # Set Context Window / Number of Frames
53 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
54 | # How many sections should each sample be broken into? (optional)
55 | # Some research papers include a 'context window' or the like,
56 | # which this refers to.
57 | frames_per_sample = 11
58 |
59 | #######################################################################
60 | # Option 1: Built-In Functionality: soundpy does everything for you
61 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
62 |
63 | ############################################################
64 | # Define which data to use and which features to extract.
65 | # NOTE: beacuse of the very small dataset, will set
66 | # `perc_train` to a lower level than 0.8. (Otherwise, will raise error)
67 | # Everything else is based on defaults. A feature folder with
68 | # the feature data will be created in the current working directory.
69 | # (Although, you can set this under the parameter `data_features_dir`)
70 | # `visualize` saves periodic images of the features extracted.
71 | # This is useful if you want to know what's going on during the process.
72 | perc_train = 0.6 # with larger datasets this would be around 0.8
73 | extraction_dir = sp.denoiser_feats(
74 | data_clean_dir = data_clean_dir,
75 | data_noisy_dir = data_noisy_dir,
76 | sr = sr,
77 | feature_type = feature_type,
78 | dur_sec = dur_sec,
79 | frames_per_sample = frames_per_sample,
80 | perc_train = perc_train,
81 | limit = 200,
82 | visualize=True);
83 | extraction_dir
84 |
85 | ################################################################
86 | # The extracted features, extraction settings applied, and
87 | # which audio files were assigned to which datasets
88 | # will be saved in the `extraction_dir` directory
89 |
90 |
91 | ############################################################
92 | # And that's it!
93 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_featureprep_envclassifier.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =====================================
4 | Feature Extraction for Classification
5 | =====================================
6 |
7 | Extract acoustic features from labeled data for
8 | training an environment or speech classifier.
9 |
10 | To see how soundpy implements this, see `soundpy.builtin.envclassifier_feats`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 |
17 |
18 | #####################################################################
19 | import os, sys
20 | import inspect
21 | currentdir = os.path.dirname(os.path.abspath(
22 | inspect.getfile(inspect.currentframe())))
23 | parentdir = os.path.dirname(currentdir)
24 | parparentdir = os.path.dirname(parentdir)
25 | packagedir = os.path.dirname(parparentdir)
26 | sys.path.insert(0, packagedir)
27 |
28 | import matplotlib.pyplot as plt
29 | import soundpy as sp
30 | import IPython.display as ipd
31 | package_dir = '../../../'
32 | os.chdir(package_dir)
33 | sp_dir = package_dir
34 | ######################################################
35 | # Prepare for Extraction: Data Organization
36 | # -----------------------------------------
37 |
38 | ######################################################
39 | # I will use a sample speech commands data set:
40 |
41 | ##########################################################
42 | # Designate path relevant for accessing audiodata
43 | data_dir = '/home/airos/Projects/Data/sound/speech_commands_small_section/'
44 |
45 | ######################################################
46 | # Choose Feature Type
47 | # ~~~~~~~~~~~~~~~~~~~
48 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
49 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
50 |
51 | feature_type = 'fbank'
52 |
53 | ######################################################
54 | # Set Duration of Audio
55 | # ~~~~~~~~~~~~~~~~~~~~~
56 | # How much audio in seconds used from each audio file.
57 | # The example noise and speech files are only 1 second long
58 | dur_sec = 1
59 |
60 |
61 | #############################################################
62 | # Built-In Functionality - soundpy extracts the features for you
63 | # ----------------------------------------------------------------------------
64 |
65 | ############################################################
66 | # Define which data to use and which features to extract
67 | # Everything else is based on defaults. A feature folder with
68 | # the feature data will be created in the current working directory.
69 | # (Although, you can set this under the parameter `data_features_dir`)
70 | # `visualize` saves periodic images of the features extracted.
71 | # This is useful if you want to know what's going on during the process.
72 | extraction_dir = sp.envclassifier_feats(data_dir,
73 | feature_type=feature_type,
74 | dur_sec=dur_sec,
75 | visualize=True);
76 |
77 | ################################################################
78 | # The extracted features, extraction settings applied, and
79 | # which audio files were assigned to which datasets
80 | # will be saved in the following directory:
81 | extraction_dir
82 |
83 | ############################################################
84 | # And that's it!
85 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_filter_out_noise.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 | """
4 | ===========================
5 | Filter Out Background Noise
6 | ===========================
7 |
8 | Filter out background noise from noisy speech signals.
9 |
10 | To see how soundpy implements this, see `soundpy.builtin.filtersignal`.
11 |
12 | As a general note for filtering, the Wiener Filter is the default filter for soundpy. It seems to filter signals more consequently than the Band Spectral Subtraction Filter.
13 | """
14 |
15 |
16 | ###############################################################################################
17 | #
18 |
19 |
20 | #####################################################################
21 |
22 | # Let's import soundpy, and ipd for playing audio data
23 | import soundpy as sp
24 | import IPython.display as ipd
25 |
26 |
27 | ######################################################
28 | # Define the noisy and clean speech audio files.
29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
30 | # Note: these files are available in the soundpy repo.
31 | # Designate path relevant for accessing audiodata
32 | sp_dir = '../../../'
33 |
34 | ##########################################################
35 | # Noise sample:
36 | noise = '{}audiodata/background_samples/traffic.wav'.format(sp_dir)
37 | noise = sp.string2pathlib(noise)
38 | speech = '{}audiodata/python.wav'.format(sp_dir)
39 | speech = sp.utils.string2pathlib(speech)
40 |
41 | ##########################################################
42 | # For filtering, we will set the sample rate to be quite high:
43 | sr = 48000
44 |
45 | ##########################################################
46 | # Create noisy speech signal as SNR 10
47 | noisy, snr_measured = sp.dsp.add_backgroundsound(
48 | speech,
49 | noise,
50 | sr = sr,
51 | snr = 10,
52 | total_len_sec = 3,
53 | pad_mainsound_sec = 0.75)
54 |
55 | ##########################################################
56 | # Hear and see the noisy speech
57 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
58 |
59 | ipd.Audio(noisy,rate=sr)
60 |
61 | ##########################################################
62 | sp.plotsound(noisy, sr=sr, feature_type='signal',
63 | title='Noisy Speech ')
64 |
65 |
66 | ##########################################################
67 | # Hear and see the clean speech
68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
69 | s, sr = sp.loadsound(speech, sr=sr)
70 | ipd.Audio(s,rate=sr)
71 |
72 | ##########################################################
73 | sp.plotsound(s, sr=sr, feature_type='signal',
74 | title='Clean Speech ')
75 |
76 |
77 | ##########################################################
78 | # Filter the noisy speech
79 | # ^^^^^^^^^^^^^^^^^^^^^^^
80 |
81 | ##########################################################
82 | # Wiener Filter
83 | # ~~~~~~~~~~~~~
84 |
85 | ##########################################################
86 | # Let's filter with a Wiener filter:
87 | noisy_wf, sr = sp.filtersignal(noisy,
88 | sr=sr,
89 | filter_type='wiener') # default
90 |
91 | ##########################################################
92 | ipd.Audio(noisy_wf,rate=sr)
93 |
94 | ##########################################################
95 | sp.plotsound(noisy_wf, sr=sr, feature_type='signal',
96 | title='Noisy Speech: Wiener Filter')
97 |
98 | #################################################################
99 | # Wiener Filter with Postfilter
100 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101 |
102 | ##########################################################
103 | # Let's filter with a Wiener filter and postfilter
104 | noisy_wfpf, sr = sp.filtersignal(noisy,
105 | sr=sr,
106 | filter_type='wiener',
107 | apply_postfilter = True)
108 |
109 | ##########################################################
110 | ipd.Audio(noisy_wfpf,rate=sr)
111 |
112 | ##########################################################
113 | sp.plotsound(noisy_wfpf, sr=sr, feature_type='signal',
114 | title='Noisy Speech: Wiener Filter with Postfilter')
115 |
116 | #################################################################
117 | # Band Spectral Subtraction
118 | # ~~~~~~~~~~~~~~~~~~~~~~~~~
119 |
120 | ##########################################################
121 | # Let's filter using band spectral subtraction
122 | noisy_bs, sr = sp.filtersignal(noisy,
123 | sr=sr,
124 | filter_type='bandspec')
125 |
126 | ##########################################################
127 | ipd.Audio(noisy_bs,rate=sr)
128 |
129 | ##########################################################
130 | sp.plotsound(noisy_bs, sr=sr, feature_type='signal',
131 | title='Noisy Speech: Band Spectral Subtraction')
132 |
133 |
134 | #################################################################
135 | # Band Spectral Subtraction with Postfilter
136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137 |
138 | #########################################################################
139 | # Finally, let's filter using band spectral subtraction with a postfilter
140 | noisy_bspf, sr = sp.filtersignal(noisy,
141 | sr=sr,
142 | filter_type='bandspec',
143 | apply_postfilter = True)
144 |
145 | ##########################################################
146 | ipd.Audio(noisy_bspf,rate=sr)
147 |
148 | ##########################################################
149 | sp.plotsound(noisy_bspf, sr=sr, feature_type='signal',
150 | title='Noisy Speech: Band Spectral Subtraction with Postfilter')
151 |
152 |
153 | ##########################################################
154 | # Filter: increase the scale
155 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^
156 |
157 | ##########################################################
158 | # Let's filter with a Wiener filter:
159 | filter_scale = 5
160 | noisy_wf, sr = sp.filtersignal(noisy,
161 | sr=sr,
162 | filter_type='wiener',
163 | filter_scale = filter_scale)
164 |
165 | ##########################################################
166 | # Wiener Filter
167 | # ~~~~~~~~~~~~~
168 |
169 | ##########################################################
170 | ipd.Audio(noisy_wf,rate=sr)
171 |
172 | ##########################################################
173 | sp.plotsound(noisy_wf, sr=sr, feature_type='signal',
174 | title='Noisy Speech: Wiener Filter Scale {}'.format(filter_scale))
175 |
176 | #################################################################
177 | # Wiener Filter with Postfilter
178 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
179 |
180 | ##########################################################
181 | # Let's filter with a Wiener filter and postfilter
182 | noisy_wfpf, sr = sp.filtersignal(noisy,
183 | sr=sr,
184 | filter_type='wiener',
185 | apply_postfilter = True,
186 | filter_scale = filter_scale)
187 |
188 | ##########################################################
189 | ipd.Audio(noisy_wfpf,rate=sr)
190 |
191 | ##########################################################
192 | sp.plotsound(noisy_wfpf, sr=sr, feature_type='signal',
193 | title='Noisy Speech: Wiener Filter with Postfilter Scale {}'.format(filter_scale))
194 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_implement_denoiser.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =================================
4 | Implement a Denoising Autoencoder
5 | =================================
6 |
7 | Implement denoising autoencoder to denoise a noisy speech signal.
8 |
9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_run`.
10 | """
11 |
12 |
13 | ############################################################################################
14 | #
15 |
16 | #####################################################################
17 | # Let's import soundpy and other packages
18 | import soundpy as sp
19 | import numpy as np
20 | # for playing audio in this notebook:
21 | import IPython.display as ipd
22 |
23 | #####################################################################
24 | # As well as the deep learning component of soundpy
25 | from soundpy import models as spdl
26 |
27 | ######################################################
28 | # Prepare for Implementation: Data Organization
29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
30 |
31 | ##########################################################
32 | # Set path relevant for audio data for this example
33 | sp_dir = '../../../'
34 |
35 | ######################################################
36 | # Set model pathway
37 | # ~~~~~~~~~~~~~~~~~
38 | # Currently, this expects a model saved with weights, with a .h5 extension.
39 | # (See `model` below)
40 |
41 | ######################################################
42 | # The soundpy repo offers a pre-trained denoiser, which we'll use.
43 | model = '{}audiodata/models/'.format(sp_dir)+\
44 | 'denoiser/example_denoiser_stft.h5'
45 | # ensure is a pathlib.PosixPath object
46 | print(model)
47 | model = sp.utils.string2pathlib(model)
48 | model_dir = model.parent
49 |
50 | #########################################################
51 | # What is in this folder?
52 | files = list(model_dir.glob('*.*'))
53 | for f in files:
54 | print(f.name)
55 |
56 | ######################################################
57 | # Provide dictionary with feature extraction settings
58 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 |
60 | #########################################################
61 | # If soundpy extracts features for you, a 'log_extraction_settings.csv'
62 | # file will be saved, which includes relevant feature settings for implementing
63 | # the model; see `soundpy.feats.save_features_datasets`
64 | feat_settings = sp.utils.load_dict(
65 | model_dir.joinpath('log_extraction_settings.csv'))
66 | for key, value in feat_settings.items():
67 | print(key, ' --> ', value)
68 | # change objects that were string to original format
69 | import ast
70 | try:
71 | feat_settings[key] = ast.literal_eval(value)
72 | except ValueError:
73 | pass
74 | except SyntaxError:
75 | pass
76 |
77 | #########################################################
78 | # For the purposes of plotting, let's use some of the settings defined:
79 | feature_type = feat_settings['feature_type']
80 | sr = feat_settings['sr']
81 |
82 | ######################################################
83 | # Provide new audio for the denoiser to denoise!
84 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
85 |
86 | #########################################################
87 | # We'll use sample speech from the soundpy repo:
88 | speech = sp.string2pathlib('{}audiodata/python.wav'.format(sp_dir))
89 | s, sr = sp.loadsound(speech, sr=sr)
90 |
91 | #########################################################
92 | # Let's add some white noise (10 SNR)
93 | s_n = sp.augment.add_white_noise(s, sr=sr, snr=10)
94 |
95 | ##############################################################
96 | # What does the noisy audio sound like?
97 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
98 | ipd.Audio(s_n,rate=sr)
99 |
100 | ##############################################################
101 | # What does the noisy audio look like?
102 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
103 | sp.plotsound(s_n, sr = sr, feature_type='signal')
104 |
105 | ##############################################################
106 | # What does the clean audio sound like?
107 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 | ipd.Audio(s,rate=sr)
109 |
110 | ##############################################################
111 | # What does the clean audio look like?
112 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
113 | sp.plotsound(s, sr = sr, feature_type='signal')
114 |
115 | #########################################################################
116 | # Built-In Denoiser Functionality
117 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
118 |
119 | ##############################################################
120 | # We just need to feed the model path, the noisy sample path, and
121 | # the feature settings dictionary we looked at above.
122 | y, sr = spdl.denoiser_run(model, s_n, feat_settings)
123 |
124 | ##########################################################
125 | # How does the output sound?
126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~
127 | ipd.Audio(y,rate=sr)
128 |
129 | ##########################################################
130 | # How does is the output look?
131 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
132 | sp.plotsound(y, sr=sr, feature_type = 'signal')
133 |
134 | ##########################################################
135 | # How do the features compare?
136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137 |
138 | ##########################################################
139 | # STFT features of the noisy input speech:
140 | sp.plotsound(s_n, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
141 | title = 'Noisy input: STFT features')
142 |
143 | ##########################################################
144 | # STFT features of the output
145 | sp.plotsound(y, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
146 | title = 'Denoiser Output: STFT features')
147 |
148 | ##########################################################
149 | # STFT features of the clean version of the audio:
150 | sp.plotsound(s, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
151 | title = 'Clean "target" audio: STFT features')
152 |
153 |
154 | ##########################################################
155 | # It's not perfect but for a pretty simple implementation, the noise is gone
156 | # and you can hear the person speaking. Pretty cool!
157 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_signals_and_features.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =======================
4 | Create and Plot Signals
5 | =======================
6 |
7 | Create and plot signals / noise; combine them at a specific SNR.
8 |
9 | To see how soundpy implements this, see `soundpy.dsp.generate_sound`,
10 | `soundpy.dsp.generate_noise` and `soundpy.dsp.add_backgroundsound`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 |
17 | #####################################################################
18 | # Let's import soundpy
19 | import soundpy as sp
20 |
21 | ###########################################################################
22 | # Create a Signal
23 | # ^^^^^^^^^^^^^^^
24 |
25 | ########################################################################
26 | # First let's set what sample rate we want to use
27 | sr = 44100
28 |
29 |
30 | #########################################################################
31 | # Let's create a signal of 10 Hz
32 | sig1_hz = 10
33 | sig1, sr = sp.generate_sound(freq=sig1_hz, amplitude = 0.4, sr=sr, dur_sec=1)
34 | sp.plotsound(sig1, sr=sr, feature_type = 'signal',
35 | title = 'Signal: {} Hz'.format(sig1_hz))
36 |
37 |
38 | #########################################################################
39 | # Let's create a signal of 20 Hz
40 | sig2_hz = 20
41 | sig2, sr = sp.generate_sound(freq=sig2_hz, amplitude= 0.4, sr=sr, dur_sec=1)
42 | sp.plotsound(sig2, sr=sr, feature_type = 'signal',
43 | title = 'Signal: {} Hz'.format(sig2_hz))
44 |
45 | ###########################################################################
46 | # Combine Signals
47 | # ^^^^^^^^^^^^^^^
48 |
49 |
50 | #########################################################################
51 | # Add them together and see what they look like:
52 | sig3 = sig1 + sig2
53 | sp.plotsound(sig3, sr=sr, feature_type = 'signal',
54 | title='Mixed Signals: {} Hz + {} Hz'.format(sig1_hz, sig2_hz))
55 |
56 |
57 | ##########################################################################
58 | # Generate Pseudo-Random Noise
59 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
60 |
61 |
62 | #########################################################################
63 | # Create noise to add to the signal:
64 | noise = sp.generate_noise(len(sig3), amplitude=0.025, random_seed=40)
65 | sp.plotsound(noise, sr=sr, feature_type = 'signal',
66 | title='Random Noise')
67 |
68 | ###########################################################################
69 | # Control SNR: Adding a Background Sound
70 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
71 |
72 | #########################################################################
73 | # Add noise at signal-to-noise ratio of 40
74 | sig_noisy, snr = sp.dsp.add_backgroundsound(
75 | audio_main = sig3,
76 | audio_background = noise,
77 | sr = sr,
78 | snr = 40,
79 | clip_at_zero = False)
80 |
81 | # keep energy between 1 and -1
82 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
83 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 40 SNR')
84 |
85 | #########################################################################
86 | # Add noise at signal-to-noise ratio of 20
87 | sig_noisy, snr = sp.dsp.add_backgroundsound(
88 | audio_main = sig3,
89 | audio_background = noise,
90 | sr = sr,
91 | snr = 20)
92 | # keep energy between 1 and -1
93 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
94 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 20 SNR')
95 |
96 | #########################################################################
97 | # Add noise at signal-to-noise ratio of 10
98 | sig_noisy, snr = sp.dsp.add_backgroundsound(
99 | audio_main = sig3,
100 | audio_background = noise,
101 | sr = sr,
102 | snr = 10)
103 | # keep energy between 1 and -1
104 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
105 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 10 SNR')
106 |
107 | #########################################################################
108 | # Add noise at signal-to-noise ratio of 0
109 | sig_noisy, snr = sp.dsp.add_backgroundsound(
110 | audio_main = sig3,
111 | audio_background = noise,
112 | sr = sr,
113 | snr = 0)
114 | # keep energy between 1 and -1
115 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
116 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 0 SNR')
117 |
118 |
119 | #########################################################################
120 | # Add noise at signal-to-noise ratio of -10
121 | sig_noisy, snr = sp.dsp.add_backgroundsound(
122 | audio_main = sig3,
123 | audio_background = noise,
124 | sr = sr,
125 | snr = -10)
126 | # keep energy between 1 and -1
127 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
128 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: -10 SNR')
129 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_train_classifier.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | ============================
4 | Train an Acoustic Classifier
5 | ============================
6 |
7 | Train an acoustic classifier on speech or noise features.
8 |
9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_train`.
10 | """
11 |
12 | ###############################################################################################
13 | #
14 |
15 | #####################################################################
16 | # Let's import soundpy for handling sound
17 | import soundpy as sp
18 | #####################################################################
19 | # As well as the deep learning component of soundpy
20 | from soundpy import models as spdl
21 |
22 |
23 | ######################################################
24 | # Prepare for Training: Data Organization
25 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
26 |
27 | ##########################################################
28 | # Set path relevant for audio data for this example
29 | sp_dir = '../../../'
30 |
31 | ######################################################
32 | # I will load previously extracted features (from the Speech Commands Dataset)
33 | # See `soundpy.feats.save_features_datasets` or `soundpy.builtin.envclassifier_feats`
34 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
35 | 'envclassifier/example_feats_fbank/'
36 |
37 | #########################################################
38 | # What is in this folder?
39 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
40 | files = list(feature_extraction_dir.glob('*.*'))
41 | for f in files:
42 | print(f.name)
43 |
44 | #########################################################
45 | # The .npy files contain the features themselves, in train, validation, and
46 | # test datasets:
47 | files = list(feature_extraction_dir.glob('*.npy'))
48 | for f in files:
49 | print(f.name)
50 |
51 | #########################################################
52 | # The .csv files contain information about how the features were extracted
53 | files = list(feature_extraction_dir.glob('*.csv'))
54 | for f in files:
55 | print(f.name)
56 |
57 | #########################################################
58 | # We'll have a look at which features were extracted and other settings:
59 | feat_settings = sp.utils.load_dict(
60 | feature_extraction_dir.joinpath('log_extraction_settings.csv'))
61 | for key, value in feat_settings.items():
62 | print(key, ' --> ', value)
63 |
64 | #########################################################
65 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
66 |
67 | #########################################################
68 | # We'll have a look at the audio files that were assigned
69 | # to the train, val, and test datasets.
70 | audio_datasets = sp.utils.load_dict(
71 | feature_extraction_dir.joinpath('dataset_audiofiles.csv'))
72 | count = 0
73 | for key, value in audio_datasets.items():
74 | print(key, ' --> ', value)
75 | count += 1
76 | if count > 5:
77 | break
78 |
79 | #############################################################
80 | # Built-In Functionality: soundpy does everything for you
81 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
82 | # For more about this function, see `soundpy.models.builtin.envclassifier_train`.
83 |
84 | #############################################################
85 | model_dir, history = spdl.envclassifier_train(
86 | feature_extraction_dir = feature_extraction_dir,
87 | epochs = 50,
88 | patience = 30)
89 |
90 | #############################################################
91 | # Where the model and logs are located:
92 | model_dir
93 |
94 | #############################################################
95 | # Let's plot how the model performed (on this mini dataset)
96 | import matplotlib.pyplot as plt
97 | plt.clf()
98 | plt.plot(history.history['accuracy'])
99 | plt.plot(history.history['val_accuracy'])
100 | plt.title('model accuracy')
101 | plt.ylabel('accuracy')
102 | plt.xlabel('epoch')
103 | plt.legend(['train', 'val'], loc='upper right')
104 | plt.show()
105 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/examples/plot_train_denoiser.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =============================
4 | Train a Denoising Autoencoder
5 | =============================
6 |
7 | Train a denoising autoencoder with clean and noisy acoustic features.
8 |
9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_train`,
10 | `soundpy.builtin.denoiser_feats` and `soundpy.builtin.create_denoise_data`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 |
17 |
18 | #####################################################################
19 | # Let's import soundpy for handling sound
20 | import soundpy as sp
21 | #####################################################################
22 | # As well as the deep learning component of soundpy
23 | from soundpy import models as spdl
24 |
25 |
26 | ######################################################
27 | # Prepare for Training: Data Organization
28 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
29 |
30 | ##########################################################
31 | # Designate path relevant for accessing audiodata
32 | sp_dir = '../../../'
33 |
34 |
35 | ######################################################
36 | # I will load previously extracted features (sample data), see `soundpy.feats.save_features_datasets` or `soundpy.builtin.denoiser_feats`
37 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
38 | 'denoiser/example_feats_fbank/'
39 |
40 | #########################################################
41 | # What is in this folder?
42 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
43 | files = list(feature_extraction_dir.glob('*.*'))
44 | for f in files:
45 | print(f.name)
46 |
47 | #########################################################
48 | # The .npy files contain the features themselves, in train, validation, and
49 | # test datasets:
50 | files = list(feature_extraction_dir.glob('*.npy'))
51 | for f in files:
52 | print(f.name)
53 |
54 | #########################################################
55 | # The .csv files contain information about how the features were extracted
56 | files = list(feature_extraction_dir.glob('*.csv'))
57 | for f in files:
58 | print(f.name)
59 |
60 | #########################################################
61 | # We'll have a look at which features were extracted and other settings:
62 | feat_settings = sp.utils.load_dict(
63 | feature_extraction_dir.joinpath('log_extraction_settings.csv'))
64 | for key, value in feat_settings.items():
65 | print(key, ' ---> ', value)
66 |
67 | #########################################################
68 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
69 |
70 | #########################################################
71 | # We'll have a look at the audio files that were assigned
72 | # to the train, val, and test datasets.
73 | audio_datasets = sp.utils.load_dict(
74 | feature_extraction_dir.joinpath('audiofiles_datasets_clean.csv'))
75 | count = 0
76 | for key, value in audio_datasets.items():
77 | print(key, ' ---> ', value)
78 | count += 1
79 | if count > 5:
80 | break
81 |
82 | #############################################################
83 | # Built-In Functionality: soundpy does everything for you
84 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
85 | # For more about this, see `soundpy.builtin.denoiser_train`.
86 |
87 | #############################################################
88 | model_dir, history = spdl.denoiser_train(
89 | feature_extraction_dir = feature_extraction_dir,
90 | epochs = 50)
91 |
92 | #########################################################
93 |
94 |
95 | #############################################################
96 | # Where the model and logs are located:
97 | model_dir
98 |
99 |
100 | #############################################################
101 | # Let's plot how the model performed (on this mini dataset)
102 |
103 | import matplotlib.pyplot as plt
104 | plt.plot(history.history['loss'])
105 | plt.plot(history.history['val_loss'])
106 | plt.title('model loss')
107 | plt.ylabel('loss')
108 | plt.xlabel('epoch')
109 | plt.legend(['train', 'val'], loc='upper right')
110 | plt.show()
111 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/exceptions.rst:
--------------------------------------------------------------------------------
1 |
2 | Customized Errors
3 | -----------------
4 |
5 | .. automodule:: soundpy.exceptions
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/feats.rst:
--------------------------------------------------------------------------------
1 |
2 | Extract and manipulate audio features
3 | -------------------------------------
4 |
5 | .. automodule:: soundpy.feats
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/files.rst:
--------------------------------------------------------------------------------
1 |
2 | Working with audio files
3 | ------------------------
4 |
5 | .. automodule:: soundpy.files
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/filters.rst:
--------------------------------------------------------------------------------
1 |
2 | Filters: Wiener and Band Spectral Subtraction
3 | ---------------------------------------------
4 |
5 | .. automodule:: soundpy.filters
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
10 | .. autoclass:: soundpy.filters.FilterSettings
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | .. automethod:: __init__
16 |
17 | .. autoclass:: soundpy.filters.Filter
18 | :members:
19 | :undoc-members:
20 | :show-inheritance:
21 |
22 | .. automethod:: __init__
23 |
24 |
25 | .. autoclass:: soundpy.filters.WienerFilter
26 | :members:
27 | :undoc-members:
28 | :show-inheritance:
29 |
30 | .. automethod:: __init__
31 |
32 |
33 | .. autoclass:: soundpy.filters.BandSubtraction
34 | :members:
35 | :undoc-members:
36 | :show-inheritance:
37 |
38 | .. automethod:: __init__
39 |
40 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/index.rst:
--------------------------------------------------------------------------------
1 | .. SoundPy documentation master file, created by
2 | sphinx-quickstart on Mon Jun 15 11:57:18 2020.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | SoundPy v0.1.0a2
7 | ====================
8 |
9 | Welcome to the docs!
10 | --------------------
11 |
12 | SoundPy is a research based Python package_ for exploring and experimenting with sound and deep learning.
13 |
14 | Those who might find this useful:
15 |
16 | * speech and sound enthusiasts
17 | * digital signal processing / mathematics / physics / acoustics enthusiasts
18 | * deep learning enthusiasts
19 | * researchers
20 | * linguists
21 | * psycholinguists
22 |
23 | The main goal of SoundPy is to provide the code and functionality with more context via visualization, research, and mathematics. Most of the resources used to build the functionality stems from publicly available research and datasets.
24 |
25 | As it covers quite a large range, from audio file conversion to implementation of trained neural networks, the purpose of SoundPy is not to be the perfect implementation of all functions (although that is also a goal :P ), but rather a peak into how they *can* be implemented, hopefully offering people a foundation for trying out different ways of implementation (feature extraction, building neural networks, etc.).
26 |
27 | This project is still in the beginning stages and has a lot of room for growth, especially with contributors having a background / knowlege in data science, computer science, machine and deep learning, physics, acoustics, or dsp. Contributors from other backgrounds are also welcome! If you'd like SoundPy to do something it doesn't, try making it or create an issue.
28 |
29 | .. _package: https://github.com/a-n-rose/Python-Sound-Tool/tree/master
30 |
31 | .. toctree::
32 | :maxdepth: 2
33 |
34 | example_cases.rst
35 | readme.rst
36 |
37 |
38 | .. toctree::
39 | :maxdepth: 1
40 |
41 | changelog.rst
42 |
43 | * :ref:`genindex`
44 | * :ref:`modindex`
45 | * :ref:`search`
46 |
47 | :Author:
48 | Aislyn Rose
49 |
50 | rose.aislyn.noelle@gmail.com
51 |
52 | webpage_
53 |
54 | github_
55 |
56 | .. _webpage: https://a-n-rose.github.io/
57 |
58 | .. _github : https://github.com/a-n-rose
59 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/model_dataprep.rst:
--------------------------------------------------------------------------------
1 |
2 | Feeding large datasets to models
3 | --------------------------------
4 |
5 | .. autoclass:: soundpy.models.dataprep.Generator
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
10 | .. automethod:: __init__
11 |
12 |
13 | .. automodule:: soundpy.models.dataprep
14 | :members:
15 | :undoc-members:
16 | :show-inheritance:
17 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/modelsetup.rst:
--------------------------------------------------------------------------------
1 |
2 | Additional model setup (e.g. Early Stopping)
3 | --------------------------------------------
4 |
5 | .. automodule:: soundpy.models.modelsetup
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/modules.rst:
--------------------------------------------------------------------------------
1 | =========================
2 | SoundPy Functionality
3 | =========================
4 |
5 | .. include:: builtin_sp.rst
6 |
7 | .. include:: builtin_spdl.rst
8 |
9 | .. include:: augment.rst
10 |
11 | .. include:: files.rst
12 |
13 | .. include:: datasets.rst
14 |
15 | .. include:: dsp.rst
16 |
17 | .. include:: filters.rst
18 |
19 | .. include:: feats.rst
20 |
21 | .. include:: template_models.rst
22 |
23 | .. include:: modelsetup.rst
24 |
25 | .. include:: model_dataprep.rst
26 |
27 | .. include:: utils.rst
28 |
29 | .. include:: exceptions.rst
30 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: modules.rst
2 |
3 |
4 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/template_models.rst:
--------------------------------------------------------------------------------
1 | Template deep neural networks
2 | -----------------------------
3 |
4 | .. automodule:: soundpy.models.template_models
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a2/utils.rst:
--------------------------------------------------------------------------------
1 |
2 | Other useful non-specific functionality
3 | ---------------------------------------
4 |
5 | .. automodule:: soundpy.utils
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/augment.rst:
--------------------------------------------------------------------------------
1 |
2 | Augment audio data
3 | ------------------
4 |
5 | .. automodule:: soundpy.augment
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/builtin_sp.rst:
--------------------------------------------------------------------------------
1 |
2 | Built-In Functionality (non Deep Learning)
3 | ------------------------------------------
4 |
5 | .. automodule:: soundpy.builtin
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/builtin_spdl.rst:
--------------------------------------------------------------------------------
1 |
2 | Built-In Functionality (Deep Learning)
3 | --------------------------------------
4 |
5 | .. automodule:: soundpy.models.builtin
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/changelog.rst:
--------------------------------------------------------------------------------
1 | *********
2 | Changelog
3 | *********
4 |
5 | v0.1.0a
6 | =======
7 |
8 |
9 | v0.1.0a3
10 | --------
11 | 2021-04-09
12 |
13 | Bug fixes
14 | - no longer use Librosa for feature extraction: allow easier implementation of augmentations, especially during training.
15 | - `soundpy.feats.plot` now uses parameter `subprocess` to allow for different backends to be applied, depending on when funciton is called. For example, if plotting from within a Generator while training, `subprocess` should be set to True, and the 'Agg' backend will be applied. Otherwise, 'TkAgg' backend is used. Fixes issues with multi-threading.
16 | - Fixed generator and Tensorflow issue: with Tensorflow 2.2.0+ the models in `soundpy.models.builtin` that were trained via generator failed. Use `tensorflow.data.Dataset.from_generator` to feed generator data to models.
17 | - Improved `clip_at_zero`.
18 |
19 | Features
20 | - Python 3.8 can now be used.
21 | - throw depreciation warning for parameters `context_window` or `frames_per_sample` as these "features" will be removed from feature extraction. Rather the features can be reshaped post feature extraction.
22 | - added `timestep`, `axis_timestep`, `context_window`, `axis_context_window` and `combine_axes_0_1` paremeters to `soundpy.models.Generator`: allow more control over shape of the features.
23 | - can run `soundpy.models.builtin.envclassifier_extract_train` to run with pre-extracted val and test features.
24 | - `soundpy.feats.plotsound`, `soundpy.feats.plot_vad` and `soundpy.feats.plot_dom_freq` all can plot stereo sound: for each channel in a stereo signal, a plot is either generated or saved. If a filename already exists, a date stamp is added to filename to avoid overwriting images.
25 | - allow `grayscale2color` to be applied to 2D data.
26 |
27 | Breaking changes
28 | - `soundpy.models.Generator` uses parameter `normalize` instaed of `normalized`. Found this to be more intuitive. If `normalize` is set to True, data will be normalized. Before, if `normalized` was set to True, data would not be normalized.
29 | - removed `add_tensor_last` and `add_tensor_first`: require adding of tensors (for keras) to be included in parameter `desired_input_shape`.
30 |
31 | Other changes
32 | - CPU soundpy can use Tensorflow 2.1.0, 2.2.0 and 2.3.0. Dockerfile still uses Tensorflow 2.1.0 as it is still compatible with updated code.
33 | - `soundpy.models.builtin.implement_denoiser` raises warning if cleaned features cannot be converted to raw audio samples.
34 |
35 |
36 | v0.1.0a2
37 | --------
38 | 2020-08-13
39 |
40 |
41 | Bug fixes
42 | - added `use_beg_ms` parameter in `soundpy.dsp.vad`: improved VAD recognition of silences post speech.
43 |
44 | Features
45 | - added GPU option: provide instructions and Docker image for running SoundPy with GPU
46 | - added `extend_window_ms` paremeter to `soundpy.feats.get_vad_samples` and `soundpy.feats.get_vad_stft`: can extend VAD window if desired. Useful in higher SNR environments.
47 | - added `soundpy.feats.get_samples_clipped` and `soundpy.feats.get_stft_clipped` to clip off beginning and ending silences.
48 | - added `beg_end_clipped` parameter to `soundpy.feats.plot_vad` to visualize VAD by clipping the beginning and ending silences (if True) or VAD instances throughout the signal (if False).
49 | - added `soundpy.models.dataprep.GeneratorFeatExtraction` class for extracting and augmenting features during training (still experimental).
50 | - added `soundpy.models.builtin.envclassifier_extract_train` as an example of extracting and augmenting features during training (still experimental).
51 | - added `soundpy.dsp.clip_at_zero` to enable smoother concatenations of signals and enables removal of clicks at beginning and ending of signals.
52 | - added `soundpy.dsp.remove_dc_bias` to enable smoother concatenations of signals
53 | - added and set `remove_dc` parameter to True in `soundpy.files.loadsound` and `soundpy.files.savesound` to ensure signals all have mean zero.
54 | - added `mirror_sound` option to `soundpy.dsp.apply_sample_length` as a way to extend sound.
55 | - added `soundpy.dsp.ismono` to check if samples were mono or stereo.
56 | - added `soundpy.dsp.average_channels` to average sample amplitudes across channels, e.g. to identify where high energy begins / ends in the signal without disregarding additional channels (if stereo sound).
57 | - added `soundpy.dsp.add_channels` for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound)
58 | - added stereo sound functionality to `soundpy.dsp.add_backgroundsound`, `soundpy.dsp.clip_at_zero`, `soundpy.dsp.calc_fft`, `soundpy.feats.get_stft`, `soundpy.feats.get_vad_stft`
59 |
60 |
61 | Other changes
62 | - name change: from pysoundtool to soundpy: simpler
63 | - updated dependencies to newest versions still compatible with Tensorflow 2.1.0
64 | - moved `soundpy.dsp.get_vad_samples` to `soundpy.feats.get_vad_samples`
65 | - moved `soundpy.dsp.get_vad_stft` to `soundpy.feats.get_vad_stft`
66 | - name change: allow `soundpy.feats.normalize` to be used as `soundpy.normalize`
67 | - removed `pysoundtool_online` and mybinder button as maintaining the online version was not easily done. Aim to reimplement at some point.
68 |
69 |
70 |
71 | v0.1.0a1
72 | ========
73 |
74 | Initial public alpha release.
75 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/datasets.rst:
--------------------------------------------------------------------------------
1 |
2 | Organizing datasets
3 | -------------------
4 |
5 | .. automodule:: soundpy.datasets
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/dsp.rst:
--------------------------------------------------------------------------------
1 |
2 | Working with signals
3 | --------------------
4 |
5 | .. automodule:: soundpy.dsp
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/example_cases.rst:
--------------------------------------------------------------------------------
1 |
2 | .. toctree::
3 | :maxdepth: 2
4 |
5 | .. include:: auto_examples/index.rst
6 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/README.txt:
--------------------------------------------------------------------------------
1 |
2 | -----------------------------
3 | SoundPy Example Use Cases
4 | -----------------------------
5 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_dataset_info_formatting.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | ========================================
4 | Audio Dataset Exploration and Formatting
5 | ========================================
6 |
7 | Examine audio files within a dataset, and reformat them if desired.
8 |
9 | To see how soundpy implements this, see `soundpy.builtin.dataset_logger` and
10 | `soundpy.builtin.dataset_formatter`.
11 | """
12 |
13 | #####################################################################
14 | # Let's import soundpy
15 | import soundpy as sp
16 |
17 | ###############################################################################################
18 | #
19 | # Dataset Exploration
20 | # ^^^^^^^^^^^^^^^^^^^
21 |
22 | ##########################################################
23 | # Designate path relevant for accessing audiodata
24 | sp_dir = '../../../'
25 |
26 | ##########################################################
27 | # I will explore files in a small dataset on my computer with varying file formats.
28 | dataset_path = '{}audiodata2/'.format(sp_dir)
29 | dataset_info_dict = sp.builtin.dataset_logger('{}audiodata2/'.format(sp_dir));
30 |
31 | #########################################################################
32 | # This returns our data in a dictionary, perfect for exploring via Pandas
33 | import pandas as pd
34 | all_data = pd.DataFrame(dataset_info_dict).T
35 | all_data.head()
36 |
37 | ###################################
38 | # Let's have a look at the audio files and how uniform they are:
39 | print('formats: ', all_data.format_type.unique())
40 | print('bitdepth (types): ', all_data.bitdepth.unique())
41 | print('mean duration (sec): ', all_data.dur_sec.mean())
42 | print('std dev duration (sec): ', all_data.dur_sec.std())
43 | print('min sample rate: ', all_data.sr.min())
44 | print('max sample rate: ', all_data.sr.max())
45 | print('number of channels: ', all_data.num_channels.unique())
46 |
47 |
48 | ##########################################################
49 | # For a visual example, let's plot the count of various sample rates. (48000 Hz is high definition sound, 16000 Hz is wideband, and 8000 Hz is narrowband, similar to how speech sounds on the telephone.)
50 | all_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
51 |
52 | ###############################################################################################
53 | # Reformat a Dataset
54 | # ^^^^^^^^^^^^^^^^^^
55 |
56 | ##############################################################
57 | # Let's say we have a dataset that we want to make consistent.
58 | # We can do that with soundpy
59 | new_dataset_dir = sp.builtin.dataset_formatter(
60 | dataset_path,
61 | recursive = True, # we want all the audio, even in nested directories
62 | format='WAV',
63 | bitdepth = 16, # if set to None, a default bitdepth will be applied
64 | sr = 16000, # wideband
65 | mono = True, # ensure data all have 1 channel
66 | dur_sec = 3, # audio will be limited to 3 seconds
67 | zeropad = True, # audio shorter than 3 seconds will be zeropadded
68 | new_dir = './example_dir/', # if None, a time-stamped directory will be created for you
69 | overwrite = False # can set to True if you want to overwrite files
70 | );
71 |
72 | ###############################################
73 | # Let's see what the audio data looks like now:
74 | dataset_formatted_dict = sp.builtin.dataset_logger(new_dataset_dir, recursive=True);
75 | formatted_data = pd.DataFrame(dataset_formatted_dict).T
76 |
77 | #####################
78 | formatted_data.head()
79 |
80 | ###################################
81 | print('audio formats: ', formatted_data.format_type.unique())
82 | print('bitdepth (types): ', formatted_data.bitdepth.unique())
83 | print('mean duration (sec): ', formatted_data.dur_sec.mean())
84 | print('std dev duration (sec): ', formatted_data.dur_sec.std())
85 | print('min sample rate: ', formatted_data.sr.min())
86 | print('max sample rate: ', formatted_data.sr.max())
87 | print('number of channels: ', formatted_data.num_channels.unique())
88 |
89 | ##########################################################
90 | # Now all the audio data is sampled at the same rate: 8000 Hz
91 | formatted_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
92 |
93 | ###########################################
94 | # There we go!
95 | # You can reformat only parts of the audio files, e.g. format or bitdepth.
96 | # If you leave parameters in sp.builtin.dataset_formatter as None, the original
97 | # settings of the audio file will be maintained (except for bitdepth.
98 | # A default bitdepth will be applied according to the format of the file); see `soundfile.default_subtype`.
99 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_extract_augment_train_classifier.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | ==================================================
4 | Extract, Augment, and Train an Acoustic Classifier
5 | ==================================================
6 |
7 | Extract and augment features as an acoustic classifier is trained on speech.
8 |
9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_extract_train`.
10 | """
11 |
12 | ###############################################################################################
13 | #
14 |
15 | import os, sys
16 | import inspect
17 | currentdir = os.path.dirname(os.path.abspath(
18 | inspect.getfile(inspect.currentframe())))
19 | parentdir = os.path.dirname(currentdir)
20 | parparentdir = os.path.dirname(parentdir)
21 | packagedir = os.path.dirname(parparentdir)
22 | sys.path.insert(0, packagedir)
23 |
24 | import matplotlib.pyplot as plt
25 | import IPython.display as ipd
26 | package_dir = '../../../'
27 | os.chdir(package_dir)
28 | sp_dir = package_dir
29 |
30 |
31 | #####################################################################
32 | # Let's import soundpy for handling sound
33 | import soundpy as sp
34 | #####################################################################
35 | # As well as the deep learning component of soundpy
36 | from soundpy import models as spdl
37 |
38 |
39 | ######################################################
40 | # Prepare for Training: Data Organization
41 | # =======================================
42 |
43 | ######################################################
44 | # I will use a sample speech commands data set:
45 |
46 | ##########################################################
47 | # Designate path relevant for accessing audiodata
48 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir)
49 |
50 |
51 | ######################################################
52 | # Setup a Feature Settings Dictionary
53 | # -----------------------------------
54 |
55 |
56 | feature_type = 'fbank'
57 | num_filters = 40
58 | rate_of_change = False
59 | rate_of_acceleration = False
60 | dur_sec = 1
61 | win_size_ms = 25
62 | percent_overlap = 0.5
63 | sr = 22050
64 | fft_bins = None
65 | num_mfcc = None
66 | real_signal = True
67 |
68 | get_feats_kwargs = dict(feature_type = feature_type,
69 | sr = sr,
70 | dur_sec = dur_sec,
71 | win_size_ms = win_size_ms,
72 | percent_overlap = percent_overlap,
73 | fft_bins = fft_bins,
74 | num_filters = num_filters,
75 | num_mfcc = num_mfcc,
76 | rate_of_change = rate_of_change,
77 | rate_of_acceleration = rate_of_acceleration,
78 | real_signal = real_signal)
79 |
80 | ######################################################
81 | # Setup an Augmentation Dictionary
82 | # --------------------------------
83 | # This will apply augmentations at random at each epoch.
84 | augmentation_all = dict([('add_white_noise',True),
85 | ('speed_decrease', True),
86 | ('speed_increase', True),
87 | ('pitch_decrease', True),
88 | ('pitch_increase', True),
89 | ('harmonic_distortion', True),
90 | ('vtlp', True)
91 | ])
92 |
93 | ##########################################################
94 | # see the default values for these augmentations
95 | augment_settings_dict = {}
96 | for key in augmentation_all.keys():
97 | augment_settings_dict[key] = sp.augment.get_augmentation_settings_dict(key)
98 | for key, value in augment_settings_dict.items():
99 | print(key, ' : ', value)
100 |
101 | ##########################################################
102 | # Adjust Augmentation Defaults
103 | # ----------------------------
104 |
105 |
106 | ##########################################################
107 | # Adjust Add White Noise
108 | # ~~~~~~~~~~~~~~~~~~~~~~
109 | # I want the SNR of the white noise to vary between several:
110 | # SNR 10, 15, and 20.
111 | augment_settings_dict['add_white_noise']['snr'] = [10,15,20]
112 |
113 | ##########################################################
114 | # Adjust Pitch Decrease
115 | # ~~~~~~~~~~~~~~~~~~~~~
116 | # I found the pitch changes too exaggerated, so I will
117 | # set those to 1 instead of 2 semitones.
118 | augment_settings_dict['pitch_decrease']['num_semitones'] = 1
119 |
120 | ##########################################################
121 | # Adjust Pitch Increase
122 | # ~~~~~~~~~~~~~~~~~~~~~
123 | augment_settings_dict['pitch_increase']['num_semitones'] = 1
124 |
125 | ##########################################################
126 | # Adjust Speed Decrease
127 | # ~~~~~~~~~~~~~~~~~~~~~
128 | augment_settings_dict['speed_decrease']['perc'] = 0.1
129 |
130 | ##########################################################
131 | # Adjust Speed Increase
132 | # ~~~~~~~~~~~~~~~~~~~~~
133 | augment_settings_dict['speed_increase']['perc'] = 0.1
134 |
135 |
136 | ######################################################
137 | # Update an Augmentation Dictionary
138 | # ---------------------------------
139 | # We'll include in the dictionary the settings we want for augmentations:
140 | augmentation_all.update(
141 | dict(augment_settings_dict = augment_settings_dict))
142 |
143 |
144 | ######################################################
145 | # Train the Model
146 | # ===============
147 | # Note: disregard the warning:
148 | # WARNING: Only the power spectrum of the VTLP augmented signal can be returned due to resizing the augmentation from (56, 4401) to (79, 276)
149 | #
150 | # This is due to the hyper frequency resolution applied to the audio during
151 | # vocal-tract length perturbation, and then deresolution to bring to correct size.
152 | # The current implementation applies the deresolution to the power spectrum rather than
153 | # directly to the STFT.
154 | model_dir, history = spdl.envclassifier_extract_train(
155 | model_name = 'augment_builtin_speechcommands',
156 | audiodata_path = data_dir,
157 | augment_dict = augmentation_all,
158 | labeled_data = True,
159 | batch_size = 1,
160 | epochs = 50,
161 | patience = 5,
162 | visualize = True,
163 | vis_every_n_items = 1,
164 | **get_feats_kwargs)
165 |
166 | #############################################################
167 | # Let's plot how the model performed (on this small dataset)
168 | plt.clf()
169 | plt.plot(history.history['accuracy'])
170 | plt.plot(history.history['val_accuracy'])
171 | plt.title('model accuracy')
172 | plt.ylabel('accuracy')
173 | plt.xlabel('epoch')
174 | plt.legend(['train', 'val'], loc='upper right')
175 | plt.savefig('accuracy.png')
176 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_featureprep_denoiser.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =======================================================
4 | Feature Extraction for Denoising: Clean and Noisy Audio
5 | =======================================================
6 |
7 | Extract acoustic features from clean and noisy datasets for
8 | training a denoising model, e.g. a denoising autoencoder.
9 |
10 | To see how soundpy implements this, see `soundpy.builtin.denoiser_feats`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 |
17 | #####################################################################
18 | import os, sys
19 | import inspect
20 | currentdir = os.path.dirname(os.path.abspath(
21 | inspect.getfile(inspect.currentframe())))
22 | parentdir = os.path.dirname(currentdir)
23 | parparentdir = os.path.dirname(parentdir)
24 | packagedir = os.path.dirname(parparentdir)
25 | sys.path.insert(0, packagedir)
26 |
27 | import soundpy as sp
28 | import IPython.display as ipd
29 | package_dir = '../../../'
30 | os.chdir(package_dir)
31 | sp_dir = package_dir
32 |
33 | ######################################################
34 | # Prepare for Extraction: Data Organization
35 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
36 |
37 | ######################################################
38 | # I will use a mini denoising dataset as an example
39 |
40 | # Example noisy data:
41 | data_noisy_dir = '{}../mini-audio-datasets/denoise/noisy'.format(sp_dir)
42 | # Example clean data:
43 | data_clean_dir = '{}../mini-audio-datasets/denoise/clean'.format(sp_dir)
44 | # Where to save extracted features:
45 | data_features_dir = './audiodata/example_feats_models/denoiser/'
46 |
47 | ######################################################
48 | # Choose Feature Type
49 | # ~~~~~~~~~~~~~~~~~~~
50 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
51 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
52 |
53 | feature_type = 'stft'
54 | sr = 22050
55 |
56 | ######################################################
57 | # Set Duration of Audio
58 | # ~~~~~~~~~~~~~~~~~~~~~
59 | # How much audio in seconds used from each audio file.
60 | # the speech samples are about 3 seconds long.
61 | dur_sec = 3
62 |
63 | #######################################################################
64 | # Option 1: Built-In Functionality: soundpy does everything for you
65 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
66 |
67 | ############################################################
68 | # Define which data to use and which features to extract.
69 | # NOTE: beacuse of the very small dataset, will set
70 | # `perc_train` to a lower level than 0.8. (Otherwise, will raise error)
71 | # Everything else is based on defaults. A feature folder with
72 | # the feature data will be created in the current working directory.
73 | # (Although, you can set this under the parameter `data_features_dir`)
74 | # `visualize` saves periodic images of the features extracted.
75 | # This is useful if you want to know what's going on during the process.
76 | perc_train = 0.6 # with larger datasets this would be around 0.8
77 | extraction_dir = sp.denoiser_feats(
78 | data_clean_dir = data_clean_dir,
79 | data_noisy_dir = data_noisy_dir,
80 | sr = sr,
81 | feature_type = feature_type,
82 | dur_sec = dur_sec,
83 | perc_train = perc_train,
84 | visualize=True);
85 | extraction_dir
86 |
87 | ################################################################
88 | # The extracted features, extraction settings applied, and
89 | # which audio files were assigned to which datasets
90 | # will be saved in the `extraction_dir` directory
91 |
92 |
93 | ############################################################
94 | # Logged Information
95 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
96 | # Let's have a look at the files in the extraction_dir. The files ending
97 | # with .npy extension contain the feature data; the .csv files contain
98 | # logged information.
99 | featfiles = list(extraction_dir.glob('*.*'))
100 | for f in featfiles:
101 | print(f.name)
102 |
103 | ############################################################
104 | # Feature Settings
105 | # ~~~~~~~~~~~~~~~~~~
106 | # Since much was conducted behind the scenes, it's nice to know how the features
107 | # were extracted, for example, the sample rate and number of frequency bins applied, etc.
108 | feat_settings = sp.utils.load_dict(
109 | extraction_dir.joinpath('log_extraction_settings.csv'))
110 | for key, value in feat_settings.items():
111 | print(key, ' ---> ', value)
112 |
113 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_featureprep_envclassifier.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =====================================
4 | Feature Extraction for Classification
5 | =====================================
6 |
7 | Extract acoustic features from labeled data for
8 | training an environment or speech classifier.
9 |
10 | To see how soundpy implements this, see `soundpy.builtin.envclassifier_feats`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 |
17 |
18 | #####################################################################
19 | import os, sys
20 | import inspect
21 | currentdir = os.path.dirname(os.path.abspath(
22 | inspect.getfile(inspect.currentframe())))
23 | parentdir = os.path.dirname(currentdir)
24 | parparentdir = os.path.dirname(parentdir)
25 | packagedir = os.path.dirname(parparentdir)
26 | sys.path.insert(0, packagedir)
27 |
28 | import soundpy as sp
29 | import IPython.display as ipd
30 | package_dir = '../../../'
31 | os.chdir(package_dir)
32 | sp_dir = package_dir
33 |
34 | ######################################################
35 | # Prepare for Extraction: Data Organization
36 | # -----------------------------------------
37 |
38 | ######################################################
39 | # I will use a sample speech commands data set:
40 |
41 | ##########################################################
42 | # Designate path relevant for accessing audiodata
43 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir)
44 |
45 | ######################################################
46 | # Choose Feature Type
47 | # ~~~~~~~~~~~~~~~~~~~
48 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
49 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
50 |
51 | feature_type = 'fbank'
52 |
53 | ######################################################
54 | # Set Duration of Audio
55 | # ~~~~~~~~~~~~~~~~~~~~~
56 | # How much audio in seconds used from each audio file.
57 | # The example noise and speech files are only 1 second long
58 | dur_sec = 1
59 |
60 |
61 | #############################################################
62 | # Built-In Functionality - soundpy extracts the features for you
63 | # ---------------------------------------------------------------
64 |
65 | ############################################################
66 | # Define which data to use and which features to extract
67 | # Everything else is based on defaults. A feature folder with
68 | # the feature data will be created in the current working directory.
69 | # (Although, you can set this under the parameter `data_features_dir`)
70 | # `visualize` saves periodic images of the features extracted.
71 | # This is useful if you want to know what's going on during the process.
72 | extraction_dir = sp.envclassifier_feats(data_dir,
73 | feature_type=feature_type,
74 | dur_sec=dur_sec,
75 | visualize=True);
76 |
77 | ################################################################
78 | # The extracted features, extraction settings applied, and
79 | # which audio files were assigned to which datasets
80 | # will be saved in the following directory:
81 | extraction_dir
82 |
83 | ############################################################
84 | # Logged Information
85 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
86 | # Let's have a look at the files in the extraction_dir. The files ending
87 | # with .npy extension contain the feature data; the .csv files contain
88 | # logged information.
89 | featfiles = list(extraction_dir.glob('*.*'))
90 | for f in featfiles:
91 | print(f.name)
92 |
93 | ############################################################
94 | # Feature Settings
95 | # ~~~~~~~~~~~~~~~~~~
96 | # Since much was conducted behind the scenes, it's nice to know how the features
97 | # were extracted, for example, the sample rate and number of frequency bins applied, etc.
98 | feat_settings = sp.utils.load_dict(
99 | extraction_dir.joinpath('log_extraction_settings.csv'))
100 | for key, value in feat_settings.items():
101 | print(key, ' ---> ', value)
102 |
103 |
104 | ############################################################
105 | # Labeled Data
106 | # ~~~~~~~~~~~~~~~~~~
107 | # These are the labels and their encoded values:
108 | encode_dict = sp.utils.load_dict(
109 | extraction_dir.joinpath('dict_encode.csv'))
110 | for key, value in encode_dict.items():
111 | print(key, ' ---> ', value)
112 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_filter_out_noise.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 | """
4 | ===========================
5 | Filter Out Background Noise
6 | ===========================
7 |
8 | Filter out background noise from noisy speech signals.
9 |
10 | To see how soundpy implements this, see `soundpy.builtin.filtersignal`.
11 |
12 | As a general note for filtering, the Wiener Filter is the default filter for soundpy. It seems to filter signals more consequently than the Band Spectral Subtraction Filter.
13 | """
14 |
15 |
16 | ###############################################################################################
17 | #
18 |
19 |
20 | #####################################################################
21 |
22 | # Let's import soundpy, and ipd for playing audio data
23 | import soundpy as sp
24 | import IPython.display as ipd
25 |
26 |
27 | ######################################################
28 | # Define the noisy and clean speech audio files.
29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
30 | # Note: these files are available in the soundpy repo.
31 | # Designate path relevant for accessing audiodata
32 | sp_dir = '../../../'
33 |
34 | ##########################################################
35 | # Noise sample:
36 | noise = '{}audiodata/background_samples/traffic.wav'.format(sp_dir)
37 | noise = sp.string2pathlib(noise)
38 | speech = '{}audiodata/python.wav'.format(sp_dir)
39 | speech = sp.utils.string2pathlib(speech)
40 |
41 | ##########################################################
42 | # For filtering, we will set the sample rate to be quite high:
43 | sr = 48000
44 |
45 | ##########################################################
46 | # Create noisy speech signal as SNR 10
47 | noisy, snr_measured = sp.dsp.add_backgroundsound(
48 | speech,
49 | noise,
50 | sr = sr,
51 | snr = 10,
52 | total_len_sec = 2,
53 | pad_mainsound_sec = 0.5)
54 |
55 | ##########################################################
56 | # Hear and see the noisy speech
57 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
58 |
59 | ipd.Audio(noisy,rate=sr)
60 |
61 | ##########################################################
62 | sp.plotsound(noisy, sr=sr, feature_type='signal',
63 | title = 'Noisy Speech', subprocess=True)
64 |
65 |
66 | ##########################################################
67 | # Hear and see the clean speech
68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
69 | s, sr = sp.loadsound(speech, sr=sr)
70 | ipd.Audio(s,rate=sr)
71 |
72 | ##########################################################
73 | sp.plotsound(s, sr=sr, feature_type='signal',
74 | title = 'Clean Speech', subprocess=True)
75 |
76 |
77 | ##########################################################
78 | # Filter the noisy speech
79 | # ^^^^^^^^^^^^^^^^^^^^^^^
80 |
81 | ##########################################################
82 | # Wiener Filter
83 | # ~~~~~~~~~~~~~
84 |
85 | ##########################################################
86 | # Let's filter with a Wiener filter:
87 | noisy_wf, sr = sp.filtersignal(noisy,
88 | sr = sr,
89 | filter_type = 'wiener') # default
90 |
91 | ##########################################################
92 | ipd.Audio(noisy_wf,rate=sr)
93 |
94 | ##########################################################
95 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal',
96 | title = 'Noisy Speech: Wiener Filter',
97 | subprocess=True)
98 |
99 | #################################################################
100 | # Wiener Filter with Postfilter
101 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
102 |
103 | ##########################################################
104 | # Let's filter with a Wiener filter and postfilter
105 | noisy_wfpf, sr = sp.filtersignal(noisy,
106 | sr = sr,
107 | filter_type = 'wiener',
108 | apply_postfilter = True)
109 |
110 | ##########################################################
111 | ipd.Audio(noisy_wfpf,rate=sr)
112 |
113 | ##########################################################
114 | sp.plotsound(noisy_wfpf, sr=sr, feature_type = 'signal',
115 | title = 'Noisy Speech: Wiener Filter with Postfilter',
116 | subprocess=True)
117 |
118 | #################################################################
119 | # Band Spectral Subtraction
120 | # ~~~~~~~~~~~~~~~~~~~~~~~~~
121 |
122 | ##########################################################
123 | # Let's filter using band spectral subtraction
124 | noisy_bs, sr = sp.filtersignal(noisy,
125 | sr = sr,
126 | filter_type = 'bandspec')
127 |
128 | ##########################################################
129 | ipd.Audio(noisy_bs,rate=sr)
130 |
131 | ##########################################################
132 | sp.plotsound(noisy_bs, sr = sr, feature_type = 'signal',
133 | title = 'Noisy Speech: Band Spectral Subtraction',
134 | subprocess=True)
135 |
136 |
137 | #################################################################
138 | # Band Spectral Subtraction with Postfilter
139 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
140 |
141 | #########################################################################
142 | # Finally, let's filter using band spectral subtraction with a postfilter
143 | noisy_bspf, sr = sp.filtersignal(noisy,
144 | sr = sr,
145 | filter_type = 'bandspec',
146 | apply_postfilter = True)
147 |
148 | ##########################################################
149 | ipd.Audio(noisy_bspf,rate=sr)
150 |
151 | ##########################################################
152 | sp.plotsound(noisy_bspf, sr = sr, feature_type = 'signal',
153 | title = 'Noisy Speech: Band Spectral Subtraction with Postfilter',
154 | subprocess=True)
155 |
156 |
157 | ##########################################################
158 | # Filter: increase the scale
159 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^
160 |
161 | ##########################################################
162 | # Let's filter with a Wiener filter:
163 | filter_scale = 5
164 | noisy_wf, sr = sp.filtersignal(noisy,
165 | sr=sr,
166 | filter_type = 'wiener',
167 | filter_scale = filter_scale)
168 |
169 | ##########################################################
170 | # Wiener Filter
171 | # ~~~~~~~~~~~~~
172 |
173 | ##########################################################
174 | ipd.Audio(noisy_wf,rate=sr)
175 |
176 | ##########################################################
177 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal',
178 | title = 'Noisy Speech: Wiener Filter Scale {}'.format(filter_scale),
179 | subprocess=True)
180 |
181 | #################################################################
182 | # Wiener Filter with Postfilter
183 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
184 |
185 | ##########################################################
186 | # Let's filter with a Wiener filter and postfilter
187 | noisy_wfpf, sr = sp.filtersignal(noisy,
188 | sr = sr,
189 | filter_type = 'wiener',
190 | apply_postfilter = True,
191 | filter_scale = filter_scale)
192 |
193 | ##########################################################
194 | ipd.Audio(noisy_wfpf,rate = sr)
195 |
196 | ##########################################################
197 | sp.plotsound(noisy_wfpf, sr = sr, feature_type = 'signal',
198 | title = 'Noisy Speech: Wiener Filter with Postfilter Scale {}'.format(filter_scale),
199 | subprocess=True)
200 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_implement_denoiser.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =================================
4 | Implement a Denoising Autoencoder
5 | =================================
6 |
7 | Implement denoising autoencoder to denoise a noisy speech signal.
8 |
9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_run`.
10 | """
11 |
12 |
13 | ############################################################################################
14 | #
15 |
16 | #####################################################################
17 | # Let's import soundpy and other packages
18 | import soundpy as sp
19 | import numpy as np
20 | # for playing audio in this notebook:
21 | import IPython.display as ipd
22 |
23 | #####################################################################
24 | # As well as the deep learning component of soundpy
25 | from soundpy import models as spdl
26 |
27 | ######################################################
28 | # Prepare for Implementation: Data Organization
29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
30 |
31 | ##########################################################
32 | # Set path relevant for audio data for this example
33 | sp_dir = '../../../'
34 |
35 | ######################################################
36 | # Set model pathway
37 | # ~~~~~~~~~~~~~~~~~
38 | # Currently, this expects a model saved with weights, with a .h5 extension.
39 | # (See `model` below)
40 |
41 | ######################################################
42 | # The soundpy repo offers a pre-trained denoiser, which we'll use.
43 | model = '{}audiodata/models/'.format(sp_dir)+\
44 | 'denoiser/example_denoiser_stft.h5'
45 | # ensure is a pathlib.PosixPath object
46 | print(model)
47 | model = sp.utils.string2pathlib(model)
48 | model_dir = model.parent
49 |
50 | #########################################################
51 | # What is in this folder?
52 | files = list(model_dir.glob('*.*'))
53 | for f in files:
54 | print(f.name)
55 |
56 | ######################################################
57 | # Provide dictionary with feature extraction settings
58 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 |
60 | #########################################################
61 | # If soundpy extracts features for you, a 'log_extraction_settings.csv'
62 | # file will be saved, which includes relevant feature settings for implementing
63 | # the model; see `soundpy.feats.save_features_datasets`
64 | feat_settings = sp.utils.load_dict(
65 | model_dir.joinpath('log_extraction_settings.csv'))
66 | for key, value in feat_settings.items():
67 | print(key, ' --> ', value)
68 | # change objects that were string to original format
69 | import ast
70 | try:
71 | feat_settings[key] = ast.literal_eval(value)
72 | except ValueError:
73 | pass
74 | except SyntaxError:
75 | pass
76 |
77 | #########################################################
78 | # For the purposes of plotting, let's use some of the settings defined:
79 | feature_type = feat_settings['feature_type']
80 | sr = feat_settings['sr']
81 |
82 | ######################################################
83 | # Provide new audio for the denoiser to denoise!
84 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
85 |
86 | #########################################################
87 | # We'll use sample speech from the soundpy repo:
88 | speech = sp.string2pathlib('{}audiodata/python.wav'.format(sp_dir))
89 | s, sr = sp.loadsound(speech, sr=sr)
90 |
91 | #########################################################
92 | # Let's add some white noise (10 SNR)
93 | s_n = sp.augment.add_white_noise(s, sr=sr, snr=10)
94 |
95 | ##############################################################
96 | # What does the noisy audio sound like?
97 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
98 | ipd.Audio(s_n,rate=sr)
99 |
100 | ##############################################################
101 | # What does the noisy audio look like?
102 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
103 | sp.plotsound(s_n, sr = sr, feature_type='signal', subprocess=True)
104 |
105 | ##############################################################
106 | # What does the clean audio sound like?
107 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 | ipd.Audio(s,rate=sr)
109 |
110 | ##############################################################
111 | # What does the clean audio look like?
112 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
113 | sp.plotsound(s, sr = sr, feature_type='signal', subprocess=True)
114 |
115 | #########################################################################
116 | # Built-In Denoiser Functionality
117 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
118 |
119 | ##############################################################
120 | # We just need to feed the model path, the noisy sample path, and
121 | # the feature settings dictionary we looked at above.
122 | y, sr = spdl.denoiser_run(model, s_n, feat_settings)
123 |
124 | ##########################################################
125 | # How does the output sound?
126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~
127 | ipd.Audio(y,rate=sr)
128 |
129 | ##########################################################
130 | # How does is the output look?
131 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
132 | sp.plotsound(y, sr=sr, feature_type = feature_type, subprocess=True)
133 |
134 | ##########################################################
135 | # How do the features compare?
136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137 |
138 | ##########################################################
139 | # STFT features of the noisy input speech:
140 | sp.plotsound(s_n, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
141 | title = 'Noisy input: STFT features', subprocess=True)
142 |
143 | ##########################################################
144 | # STFT features of the output
145 | sp.plotsound(y, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
146 | title = 'Denoiser Output: STFT features', subprocess=True)
147 |
148 | ##########################################################
149 | # STFT features of the clean version of the audio:
150 | sp.plotsound(s, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
151 | title = 'Clean "target" audio: STFT features', subprocess=True)
152 |
153 |
154 | ##########################################################
155 | # It's not perfect but for a pretty simple implementation, the noise is gone
156 | # and you can hear the person speaking. Pretty cool!
157 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_signals_and_features.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =======================
4 | Create and Plot Signals
5 | =======================
6 |
7 | Create and plot signals / noise; combine them at a specific SNR.
8 |
9 | To see how soundpy implements this, see `soundpy.dsp.generate_sound`,
10 | `soundpy.dsp.generate_noise` and `soundpy.dsp.add_backgroundsound`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 |
17 | #####################################################################
18 | # Let's import soundpy
19 | import soundpy as sp
20 |
21 | ###########################################################################
22 | # Create a Signal
23 | # ^^^^^^^^^^^^^^^
24 |
25 | ########################################################################
26 | # First let's set what sample rate we want to use
27 | sr = 44100
28 |
29 |
30 | #########################################################################
31 | # Let's create a signal of 10 Hz
32 | sig1_hz = 10
33 | sig1, sr = sp.generate_sound(freq=sig1_hz, amplitude = 0.4, sr=sr, dur_sec=1)
34 | sp.plotsound(sig1, sr=sr, feature_type = 'signal',
35 | title = 'Signal: {} Hz'.format(sig1_hz), subprocess=True)
36 |
37 |
38 | #########################################################################
39 | # Let's create a signal of 20 Hz
40 | sig2_hz = 20
41 | sig2, sr = sp.generate_sound(freq=sig2_hz, amplitude= 0.4, sr=sr, dur_sec=1)
42 | sp.plotsound(sig2, sr=sr, feature_type = 'signal',
43 | title = 'Signal: {} Hz'.format(sig2_hz), subprocess=True)
44 |
45 | ###########################################################################
46 | # Combine Signals
47 | # ^^^^^^^^^^^^^^^
48 |
49 |
50 | #########################################################################
51 | # Add them together and see what they look like:
52 | sig3 = sig1 + sig2
53 | sp.plotsound(sig3, sr=sr, feature_type = 'signal',
54 | title='Mixed Signals: {} Hz + {} Hz'.format(sig1_hz, sig2_hz),
55 | subprocess=True)
56 |
57 |
58 | ##########################################################################
59 | # Generate Pseudo-Random Noise
60 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
61 |
62 |
63 | #########################################################################
64 | # Create noise to add to the signal:
65 | noise = sp.generate_noise(len(sig3), amplitude=0.02, random_seed=40)
66 | sp.plotsound(noise, sr=sr, feature_type = 'signal',
67 | title='Random Noise', subprocess=True)
68 |
69 | ###########################################################################
70 | # Control SNR: Adding a Background Sound
71 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
72 |
73 | #########################################################################
74 | # Add noise at signal-to-noise ratio of 40
75 | sig_noisy, snr = sp.dsp.add_backgroundsound(
76 | audio_main = sig3,
77 | audio_background = noise,
78 | sr = sr,
79 | snr = 40,
80 | clip_at_zero = False)
81 |
82 | # keep energy between 1 and -1
83 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
84 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 40 SNR',
85 | subprocess=True)
86 |
87 | #########################################################################
88 | # Add noise at signal-to-noise ratio of 20
89 | sig_noisy, snr = sp.dsp.add_backgroundsound(
90 | audio_main = sig3,
91 | audio_background = noise,
92 | sr = sr,
93 | snr = 20)
94 | # keep energy between 1 and -1
95 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
96 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 20 SNR',
97 | subprocess=True)
98 |
99 | #########################################################################
100 | # Add noise at signal-to-noise ratio of 10
101 | sig_noisy, snr = sp.dsp.add_backgroundsound(
102 | audio_main = sig3,
103 | audio_background = noise,
104 | sr = sr,
105 | snr = 10)
106 | # keep energy between 1 and -1
107 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
108 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 10 SNR',
109 | subprocess=True)
110 |
111 | #########################################################################
112 | # Add noise at signal-to-noise ratio of 0
113 | sig_noisy, snr = sp.dsp.add_backgroundsound(
114 | audio_main = sig3,
115 | audio_background = noise,
116 | sr = sr,
117 | snr = 0)
118 | # keep energy between 1 and -1
119 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
120 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 0 SNR',
121 | subprocess=True)
122 |
123 |
124 | #########################################################################
125 | # Add noise at signal-to-noise ratio of -10
126 | sig_noisy, snr = sp.dsp.add_backgroundsound(
127 | audio_main = sig3,
128 | audio_background = noise,
129 | sr = sr,
130 | snr = -10)
131 | # keep energy between 1 and -1
132 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
133 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: -10 SNR',
134 | subprocess=True)
135 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_train_classifier.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | ============================
4 | Train an Acoustic Classifier
5 | ============================
6 |
7 | Train an acoustic classifier on speech or noise features.
8 |
9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_train`.
10 | """
11 |
12 | ###############################################################################################
13 | #
14 | import os, sys
15 | import inspect
16 | currentdir = os.path.dirname(os.path.abspath(
17 | inspect.getfile(inspect.currentframe())))
18 | parentdir = os.path.dirname(currentdir)
19 | parparentdir = os.path.dirname(parentdir)
20 | packagedir = os.path.dirname(parparentdir)
21 | sys.path.insert(0, packagedir)
22 |
23 | import matplotlib.pyplot as plt
24 | import IPython.display as ipd
25 | package_dir = '../../../'
26 | os.chdir(package_dir)
27 | sp_dir = package_dir
28 |
29 |
30 | #####################################################################
31 | # Let's import soundpy for handling sound
32 | import soundpy as sp
33 | #####################################################################
34 | # As well as the deep learning component of soundpy
35 | from soundpy import models as spdl
36 |
37 |
38 | ######################################################
39 | # Prepare for Training: Data Organization
40 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
41 |
42 | ##########################################################
43 | # Set path relevant for audio data for this example
44 |
45 | ######################################################
46 | # I will load previously extracted features (from the Speech Commands Dataset)
47 | # See `soundpy.feats.save_features_datasets` or `soundpy.builtin.envclassifier_feats`
48 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
49 | 'envclassifier/example_feats_fbank/'
50 |
51 | #########################################################
52 | # What is in this folder?
53 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
54 | files = list(feature_extraction_dir.glob('*.*'))
55 | for f in files:
56 | print(f.name)
57 |
58 | #########################################################
59 | # The .npy files contain the features themselves, in train, validation, and
60 | # test datasets:
61 | files = list(feature_extraction_dir.glob('*.npy'))
62 | for f in files:
63 | print(f.name)
64 |
65 | #########################################################
66 | # The .csv files contain information about how the features were extracted
67 | files = list(feature_extraction_dir.glob('*.csv'))
68 | for f in files:
69 | print(f.name)
70 |
71 | #########################################################
72 | # We'll have a look at which features were extracted and other settings:
73 | feat_settings = sp.utils.load_dict(
74 | feature_extraction_dir.joinpath('log_extraction_settings.csv'))
75 | for key, value in feat_settings.items():
76 | print(key, ' --> ', value)
77 |
78 | #########################################################
79 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
80 |
81 | #########################################################
82 | # We'll have a look at the audio files that were assigned
83 | # to the train, val, and test datasets.
84 | audio_datasets = sp.utils.load_dict(
85 | feature_extraction_dir.joinpath('dataset_audiofiles.csv'))
86 | count = 0
87 | for key, value in audio_datasets.items():
88 | print(key, ' --> ', value)
89 | count += 1
90 | if count > 5:
91 | break
92 |
93 | #############################################################
94 | # Built-In Functionality: soundpy does everything for you
95 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
96 | # For more about this function, see `soundpy.models.builtin.envclassifier_train`.
97 |
98 | #############################################################
99 | model_dir, history = spdl.envclassifier_train(
100 | feature_extraction_dir = feature_extraction_dir,
101 | epochs = 10,
102 | patience = 5)
103 |
104 | #############################################################
105 | # Where the model and logs are located:
106 | model_dir
107 |
108 | #############################################################
109 | # Let's plot how the model performed (on this mini dataset)
110 | import matplotlib.pyplot as plt
111 | plt.clf()
112 | plt.plot(history.history['accuracy'])
113 | plt.plot(history.history['val_accuracy'])
114 | plt.title('model accuracy')
115 | plt.ylabel('accuracy')
116 | plt.xlabel('epoch')
117 | plt.legend(['train', 'val'], loc='upper right')
118 | plt.savefig('accuracy.png')
119 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/examples/plot_train_denoiser.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =============================
4 | Train a Denoising Autoencoder
5 | =============================
6 |
7 | Train a denoising autoencoder with clean and noisy acoustic features.
8 |
9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_train`,
10 | `soundpy.builtin.denoiser_feats` and `soundpy.builtin.create_denoise_data`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 | import os, sys
17 | import inspect
18 | currentdir = os.path.dirname(os.path.abspath(
19 | inspect.getfile(inspect.currentframe())))
20 | parentdir = os.path.dirname(currentdir)
21 | parparentdir = os.path.dirname(parentdir)
22 | packagedir = os.path.dirname(parparentdir)
23 | sys.path.insert(0, packagedir)
24 |
25 | import matplotlib.pyplot as plt
26 | import IPython.display as ipd
27 | package_dir = '../../../'
28 | os.chdir(package_dir)
29 | sp_dir = package_dir
30 |
31 |
32 | #####################################################################
33 | # Let's import soundpy for handling sound
34 | import soundpy as sp
35 | #####################################################################
36 | # As well as the deep learning component of soundpy
37 | from soundpy import models as spdl
38 |
39 |
40 | ######################################################
41 | # Prepare for Training: Data Organization
42 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
43 |
44 | ##########################################################
45 | # Designate path relevant for accessing audiodata
46 |
47 |
48 | ######################################################
49 | # I will load previously extracted features (sample data), see `soundpy.feats.save_features_datasets` or `soundpy.builtin.denoiser_feats`
50 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
51 | 'denoiser/example_feats_fbank/'
52 |
53 | #########################################################
54 | # What is in this folder?
55 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
56 | files = list(feature_extraction_dir.glob('*.*'))
57 | for f in files:
58 | print(f.name)
59 |
60 | #########################################################
61 | # The .npy files contain the features themselves, in train, validation, and
62 | # test datasets:
63 | files = list(feature_extraction_dir.glob('*.npy'))
64 | for f in files:
65 | print(f.name)
66 |
67 | #########################################################
68 | # The .csv files contain information about how the features were extracted
69 | files = list(feature_extraction_dir.glob('*.csv'))
70 | for f in files:
71 | print(f.name)
72 |
73 | #########################################################
74 | # We'll have a look at which features were extracted and other settings:
75 | feat_settings = sp.utils.load_dict(
76 | feature_extraction_dir.joinpath('log_extraction_settings.csv'))
77 | for key, value in feat_settings.items():
78 | print(key, ' --> ', value)
79 |
80 | #########################################################
81 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
82 |
83 | #########################################################
84 | # We'll have a look at the audio files that were assigned
85 | # to the train, val, and test datasets.
86 | audio_datasets = sp.utils.load_dict(
87 | feature_extraction_dir.joinpath('audiofiles_datasets_clean.csv'))
88 | count = 0
89 | for key, value in audio_datasets.items():
90 | print(key, ' --> ', value)
91 | count += 1
92 | if count > 5:
93 | break
94 |
95 | #############################################################
96 | # Built-In Functionality: soundpy does everything for you
97 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
98 | # For more about this, see `soundpy.builtin.denoiser_train`.
99 |
100 | #############################################################
101 | model_dir, history = spdl.denoiser_train(
102 | feature_extraction_dir = feature_extraction_dir,
103 | epochs = 10)
104 |
105 | #########################################################
106 |
107 |
108 | #############################################################
109 | # Where the model and logs are located:
110 | model_dir
111 |
112 |
113 | #############################################################
114 | # Let's plot how the model performed (on this mini dataset)
115 | import matplotlib.pyplot as plt
116 | plt.plot(history.history['loss'])
117 | plt.plot(history.history['val_loss'])
118 | plt.title('model loss')
119 | plt.ylabel('loss')
120 | plt.xlabel('epoch')
121 | plt.legend(['train', 'val'], loc='upper right')
122 | plt.savefig('loss.png')
123 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/exceptions.rst:
--------------------------------------------------------------------------------
1 |
2 | Customized Errors
3 | -----------------
4 |
5 | .. automodule:: soundpy.exceptions
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/feats.rst:
--------------------------------------------------------------------------------
1 |
2 | Extract and manipulate audio features
3 | -------------------------------------
4 |
5 | .. automodule:: soundpy.feats
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/files.rst:
--------------------------------------------------------------------------------
1 |
2 | Working with audio files
3 | ------------------------
4 |
5 | .. automodule:: soundpy.files
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/filters.rst:
--------------------------------------------------------------------------------
1 |
2 | Filters: Wiener and Band Spectral Subtraction
3 | ---------------------------------------------
4 |
5 | .. automodule:: soundpy.filters
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
10 | .. autoclass:: soundpy.filters.FilterSettings
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | .. automethod:: __init__
16 |
17 | .. autoclass:: soundpy.filters.Filter
18 | :members:
19 | :undoc-members:
20 | :show-inheritance:
21 |
22 | .. automethod:: __init__
23 |
24 |
25 | .. autoclass:: soundpy.filters.WienerFilter
26 | :members:
27 | :undoc-members:
28 | :show-inheritance:
29 |
30 | .. automethod:: __init__
31 |
32 |
33 | .. autoclass:: soundpy.filters.BandSubtraction
34 | :members:
35 | :undoc-members:
36 | :show-inheritance:
37 |
38 | .. automethod:: __init__
39 |
40 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/index.rst:
--------------------------------------------------------------------------------
1 | .. SoundPy documentation master file, created by
2 | sphinx-quickstart on Mon Jun 15 11:57:18 2020.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | SoundPy v0.1.0a3
7 | ================
8 |
9 | Welcome to the docs!
10 | --------------------
11 |
12 | To access documentation for specific versions:
13 |
14 | .. toctree::
15 | :maxdepth: 1
16 |
17 | versions.rst
18 |
19 |
20 | About SoundPy
21 | -------------
22 |
23 | SoundPy is a research based Python package_ for exploring and experimenting with sound and deep learning. NOTE: SoundPy is in alpha stage of development; please forgive any bugs that pop up, and also feel free/encouraged to open an issue_.
24 |
25 | Those who might find this useful:
26 |
27 | * speech and sound enthusiasts
28 | * digital signal processing / mathematics / physics / acoustics enthusiasts
29 | * deep learning enthusiasts
30 | * researchers
31 | * linguists
32 | * psycholinguists
33 |
34 | The main goal of SoundPy is to provide the code and functionality with more context via visualization, research, and mathematics. Most of the resources used to build the functionality stems from publicly available research and datasets.
35 |
36 | As it covers quite a large range, from audio file conversion to implementation of trained neural networks, the purpose of SoundPy is not to be the perfect implementation of all functions (although that is also a goal :P ), but rather a peak into how they *can* be implemented, hopefully offering people a foundation for trying out different ways of implementation (feature extraction, building neural networks, etc.).
37 |
38 | This project is still in the beginning stages and has a lot of room for growth, especially with contributors having a background / knowlege in data science, computer science, machine and deep learning, physics, acoustics, or dsp. Contributors from other backgrounds are also welcome! If you'd like SoundPy to do something it doesn't, try making it or create an issue.
39 |
40 | .. _PyPI: https://pypi.org/project/soundpy/
41 |
42 | .. _package: https://github.com/a-n-rose/Python-Sound-Tool/tree/development
43 |
44 | .. _issue: https://github.com/a-n-rose/Python-Sound-Tool/issues
45 |
46 | .. toctree::
47 | :maxdepth: 2
48 |
49 | example_cases.rst
50 | readme.rst
51 |
52 |
53 | .. toctree::
54 | :maxdepth: 1
55 |
56 | changelog.rst
57 |
58 | * :ref:`genindex`
59 | * :ref:`modindex`
60 | * :ref:`search`
61 |
62 | :Author:
63 | Aislyn Rose
64 |
65 | rose.aislyn.noelle@gmail.com
66 |
67 | webpage_
68 |
69 | github_
70 |
71 | .. _webpage: https://a-n-rose.github.io/
72 |
73 | .. _github : https://github.com/a-n-rose
74 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/model_dataprep.rst:
--------------------------------------------------------------------------------
1 |
2 | Feeding large datasets to models
3 | --------------------------------
4 |
5 | .. autoclass:: soundpy.models.dataprep.Generator
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
10 | .. automethod:: __init__
11 |
12 |
13 | .. automodule:: soundpy.models.dataprep
14 | :members:
15 | :undoc-members:
16 | :show-inheritance:
17 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/modelsetup.rst:
--------------------------------------------------------------------------------
1 |
2 | Additional model setup (e.g. Early Stopping)
3 | --------------------------------------------
4 |
5 | .. automodule:: soundpy.models.modelsetup
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/modules.rst:
--------------------------------------------------------------------------------
1 | ==============================
2 | SoundPy Functionality v0.1.0a3
3 | ==============================
4 |
5 | .. include:: builtin_sp.rst
6 |
7 | .. include:: builtin_spdl.rst
8 |
9 | .. include:: augment.rst
10 |
11 | .. include:: files.rst
12 |
13 | .. include:: datasets.rst
14 |
15 | .. include:: dsp.rst
16 |
17 | .. include:: filters.rst
18 |
19 | .. include:: feats.rst
20 |
21 | .. include:: template_models.rst
22 |
23 | .. include:: modelsetup.rst
24 |
25 | .. include:: model_dataprep.rst
26 |
27 | .. include:: utils.rst
28 |
29 | .. include:: exceptions.rst
30 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: modules.rst
2 |
3 |
4 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/template_models.rst:
--------------------------------------------------------------------------------
1 | Template deep neural networks
2 | -----------------------------
3 |
4 | .. automodule:: soundpy.models.template_models
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/utils.rst:
--------------------------------------------------------------------------------
1 |
2 | Other useful non-specific functionality
3 | ---------------------------------------
4 |
5 | .. automodule:: soundpy.utils
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/0.1.0a3/versions.rst:
--------------------------------------------------------------------------------
1 | ******************************************
2 | SoundPy Versions Available as PyPI Package
3 | ******************************************
4 |
5 | .. toctree::
6 | :maxdepth: 1
7 |
8 | 0.1.0a2/index.rst
9 |
10 |
--------------------------------------------------------------------------------
/docs/source/augment.rst:
--------------------------------------------------------------------------------
1 |
2 | Augment audio data
3 | ------------------
4 |
5 | .. automodule:: soundpy.augment
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/builtin_sp.rst:
--------------------------------------------------------------------------------
1 |
2 | Built-In Functionality (non Deep Learning)
3 | ------------------------------------------
4 |
5 | .. automodule:: soundpy.builtin
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/builtin_spdl.rst:
--------------------------------------------------------------------------------
1 |
2 | Built-In Functionality (Deep Learning)
3 | --------------------------------------
4 |
5 | .. automodule:: soundpy.models.builtin
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
1 | *********
2 | Changelog
3 | *********
4 |
5 | v0.1.0a
6 | =======
7 |
8 |
9 | v0.1.0a3
10 | --------
11 | 2021-04-09
12 |
13 | Bug fixes
14 | - no longer use Librosa for feature extraction: allow easier implementation of augmentations, especially during training.
15 | - `soundpy.feats.plot` now uses parameter `subprocess` to allow for different backends to be applied, depending on when funciton is called. For example, if plotting from within a Generator while training, `subprocess` should be set to True, and the 'Agg' backend will be applied. Otherwise, 'TkAgg' backend is used. Fixes issues with multi-threading.
16 | - Fixed generator and Tensorflow issue: with Tensorflow 2.2.0+ the models in `soundpy.models.builtin` that were trained via generator failed. Use `tensorflow.data.Dataset.from_generator` to feed generator data to models.
17 | - Improved `clip_at_zero`.
18 |
19 | Features
20 | - Python 3.8 can now be used.
21 | - throw depreciation warning for parameters `context_window` or `frames_per_sample` as these "features" will be removed from feature extraction. Rather the features can be reshaped post feature extraction.
22 | - added `timestep`, `axis_timestep`, `context_window`, `axis_context_window` and `combine_axes_0_1` paremeters to `soundpy.models.Generator`: allow more control over shape of the features.
23 | - can run `soundpy.models.builtin.envclassifier_extract_train` to run with pre-extracted val and test features.
24 | - `soundpy.feats.plotsound`, `soundpy.feats.plot_vad` and `soundpy.feats.plot_dom_freq` all can plot stereo sound: for each channel in a stereo signal, a plot is either generated or saved. If a filename already exists, a date stamp is added to filename to avoid overwriting images.
25 | - allow `grayscale2color` to be applied to 2D data.
26 |
27 | Breaking changes
28 | - `soundpy.models.Generator` uses parameter `normalize` instaed of `normalized`. Found this to be more intuitive. If `normalize` is set to True, data will be normalized. Before, if `normalized` was set to True, data would not be normalized.
29 | - removed `add_tensor_last` and `add_tensor_first`: require adding of tensors (for keras) to be included in parameter `desired_input_shape`.
30 |
31 | Other changes
32 | - CPU soundpy can use Tensorflow 2.1.0, 2.2.0 and 2.3.0. Dockerfile still uses Tensorflow 2.1.0 as it is still compatible with updated code.
33 | - `soundpy.models.builtin.implement_denoiser` raises warning if cleaned features cannot be converted to raw audio samples.
34 |
35 |
36 | v0.1.0a2
37 | --------
38 | 2020-08-13
39 |
40 |
41 | Bug fixes
42 | - added `use_beg_ms` parameter in `soundpy.dsp.vad`: improved VAD recognition of silences post speech.
43 |
44 | Features
45 | - added GPU option: provide instructions and Docker image for running SoundPy with GPU
46 | - added `extend_window_ms` paremeter to `soundpy.feats.get_vad_samples` and `soundpy.feats.get_vad_stft`: can extend VAD window if desired. Useful in higher SNR environments.
47 | - added `soundpy.feats.get_samples_clipped` and `soundpy.feats.get_stft_clipped` to clip off beginning and ending silences.
48 | - added `beg_end_clipped` parameter to `soundpy.feats.plot_vad` to visualize VAD by clipping the beginning and ending silences (if True) or VAD instances throughout the signal (if False).
49 | - added `soundpy.models.dataprep.GeneratorFeatExtraction` class for extracting and augmenting features during training (still experimental).
50 | - added `soundpy.models.builtin.envclassifier_extract_train` as an example of extracting and augmenting features during training (still experimental).
51 | - added `soundpy.dsp.clip_at_zero` to enable smoother concatenations of signals and enables removal of clicks at beginning and ending of signals.
52 | - added `soundpy.dsp.remove_dc_bias` to enable smoother concatenations of signals
53 | - added and set `remove_dc` parameter to True in `soundpy.files.loadsound` and `soundpy.files.savesound` to ensure signals all have mean zero.
54 | - added `mirror_sound` option to `soundpy.dsp.apply_sample_length` as a way to extend sound.
55 | - added `soundpy.dsp.ismono` to check if samples were mono or stereo.
56 | - added `soundpy.dsp.average_channels` to average sample amplitudes across channels, e.g. to identify where high energy begins / ends in the signal without disregarding additional channels (if stereo sound).
57 | - added `soundpy.dsp.add_channels` for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound)
58 | - added stereo sound functionality to `soundpy.dsp.add_backgroundsound`, `soundpy.dsp.clip_at_zero`, `soundpy.dsp.calc_fft`, `soundpy.feats.get_stft`, `soundpy.feats.get_vad_stft`
59 |
60 |
61 | Other changes
62 | - name change: from pysoundtool to soundpy: simpler
63 | - updated dependencies to newest versions still compatible with Tensorflow 2.1.0
64 | - moved `soundpy.dsp.get_vad_samples` to `soundpy.feats.get_vad_samples`
65 | - moved `soundpy.dsp.get_vad_stft` to `soundpy.feats.get_vad_stft`
66 | - name change: allow `soundpy.feats.normalize` to be used as `soundpy.normalize`
67 | - removed `pysoundtool_online` and mybinder button as maintaining the online version was not easily done. Aim to reimplement at some point.
68 |
69 |
70 |
71 | v0.1.0a1
72 | ========
73 |
74 | Initial public alpha release.
75 |
--------------------------------------------------------------------------------
/docs/source/datasets.rst:
--------------------------------------------------------------------------------
1 |
2 | Organizing datasets
3 | -------------------
4 |
5 | .. automodule:: soundpy.datasets
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/dsp.rst:
--------------------------------------------------------------------------------
1 |
2 | Working with signals
3 | --------------------
4 |
5 | .. automodule:: soundpy.dsp
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/example_cases.rst:
--------------------------------------------------------------------------------
1 |
2 | .. toctree::
3 | :maxdepth: 2
4 |
5 | .. include:: auto_examples/index.rst
6 |
--------------------------------------------------------------------------------
/docs/source/examples/README.txt:
--------------------------------------------------------------------------------
1 |
2 | -----------------------------
3 | SoundPy Example Use Cases
4 | -----------------------------
5 |
--------------------------------------------------------------------------------
/docs/source/examples/plot_dataset_info_formatting.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | ========================================
4 | Audio Dataset Exploration and Formatting
5 | ========================================
6 |
7 | Examine audio files within a dataset, and reformat them if desired.
8 |
9 | To see how soundpy implements this, see `soundpy.builtin.dataset_logger` and
10 | `soundpy.builtin.dataset_formatter`.
11 | """
12 |
13 | #####################################################################
14 | # Let's import soundpy
15 | import soundpy as sp
16 |
17 | ###############################################################################################
18 | #
19 | # Dataset Exploration
20 | # ^^^^^^^^^^^^^^^^^^^
21 |
22 | ##########################################################
23 | # Designate path relevant for accessing audiodata
24 | sp_dir = '../../../'
25 |
26 | ##########################################################
27 | # I will explore files in a small dataset on my computer with varying file formats.
28 | dataset_path = '{}audiodata2/'.format(sp_dir)
29 | dataset_info_dict = sp.builtin.dataset_logger('{}audiodata2/'.format(sp_dir));
30 |
31 | #########################################################################
32 | # This returns our data in a dictionary, perfect for exploring via Pandas
33 | import pandas as pd
34 | all_data = pd.DataFrame(dataset_info_dict).T
35 | all_data.head()
36 |
37 | ###################################
38 | # Let's have a look at the audio files and how uniform they are:
39 | print('formats: ', all_data.format_type.unique())
40 | print('bitdepth (types): ', all_data.bitdepth.unique())
41 | print('mean duration (sec): ', all_data.dur_sec.mean())
42 | print('std dev duration (sec): ', all_data.dur_sec.std())
43 | print('min sample rate: ', all_data.sr.min())
44 | print('max sample rate: ', all_data.sr.max())
45 | print('number of channels: ', all_data.num_channels.unique())
46 |
47 |
48 | ##########################################################
49 | # For a visual example, let's plot the count of various sample rates. (48000 Hz is high definition sound, 16000 Hz is wideband, and 8000 Hz is narrowband, similar to how speech sounds on the telephone.)
50 | all_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
51 |
52 | ###############################################################################################
53 | # Reformat a Dataset
54 | # ^^^^^^^^^^^^^^^^^^
55 |
56 | ##############################################################
57 | # Let's say we have a dataset that we want to make consistent.
58 | # We can do that with soundpy
59 | new_dataset_dir = sp.builtin.dataset_formatter(
60 | dataset_path,
61 | recursive = True, # we want all the audio, even in nested directories
62 | format='WAV',
63 | bitdepth = 16, # if set to None, a default bitdepth will be applied
64 | sr = 16000, # wideband
65 | mono = True, # ensure data all have 1 channel
66 | dur_sec = 3, # audio will be limited to 3 seconds
67 | zeropad = True, # audio shorter than 3 seconds will be zeropadded
68 | new_dir = './example_dir/', # if None, a time-stamped directory will be created for you
69 | overwrite = False # can set to True if you want to overwrite files
70 | );
71 |
72 | ###############################################
73 | # Let's see what the audio data looks like now:
74 | dataset_formatted_dict = sp.builtin.dataset_logger(new_dataset_dir, recursive=True);
75 | formatted_data = pd.DataFrame(dataset_formatted_dict).T
76 |
77 | #####################
78 | formatted_data.head()
79 |
80 | ###################################
81 | print('audio formats: ', formatted_data.format_type.unique())
82 | print('bitdepth (types): ', formatted_data.bitdepth.unique())
83 | print('mean duration (sec): ', formatted_data.dur_sec.mean())
84 | print('std dev duration (sec): ', formatted_data.dur_sec.std())
85 | print('min sample rate: ', formatted_data.sr.min())
86 | print('max sample rate: ', formatted_data.sr.max())
87 | print('number of channels: ', formatted_data.num_channels.unique())
88 |
89 | ##########################################################
90 | # Now all the audio data is sampled at the same rate: 8000 Hz
91 | formatted_data.groupby('sr').count().plot(kind = 'bar', title = 'Sample Rate Counts')
92 |
93 | ###########################################
94 | # There we go!
95 | # You can reformat only parts of the audio files, e.g. format or bitdepth.
96 | # If you leave parameters in sp.builtin.dataset_formatter as None, the original
97 | # settings of the audio file will be maintained (except for bitdepth.
98 | # A default bitdepth will be applied according to the format of the file); see `soundfile.default_subtype`.
99 |
--------------------------------------------------------------------------------
/docs/source/examples/plot_extract_augment_train_classifier.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | ==================================================
4 | Extract, Augment, and Train an Acoustic Classifier
5 | ==================================================
6 |
7 | Extract and augment features as an acoustic classifier is trained on speech.
8 |
9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_extract_train`.
10 | """
11 |
12 | ###############################################################################################
13 | #
14 |
15 | import os, sys
16 | import inspect
17 | currentdir = os.path.dirname(os.path.abspath(
18 | inspect.getfile(inspect.currentframe())))
19 | parentdir = os.path.dirname(currentdir)
20 | parparentdir = os.path.dirname(parentdir)
21 | packagedir = os.path.dirname(parparentdir)
22 | sys.path.insert(0, packagedir)
23 |
24 | import matplotlib.pyplot as plt
25 | import IPython.display as ipd
26 | package_dir = '../../../'
27 | os.chdir(package_dir)
28 | sp_dir = package_dir
29 |
30 |
31 | #####################################################################
32 | # Let's import soundpy for handling sound
33 | import soundpy as sp
34 | #####################################################################
35 | # As well as the deep learning component of soundpy
36 | from soundpy import models as spdl
37 |
38 |
39 | ######################################################
40 | # Prepare for Training: Data Organization
41 | # =======================================
42 |
43 | ######################################################
44 | # I will use a sample speech commands data set:
45 |
46 | ##########################################################
47 | # Designate path relevant for accessing audiodata
48 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir)
49 |
50 |
51 | ######################################################
52 | # Setup a Feature Settings Dictionary
53 | # -----------------------------------
54 |
55 |
56 | feature_type = 'fbank'
57 | num_filters = 40
58 | rate_of_change = False
59 | rate_of_acceleration = False
60 | dur_sec = 1
61 | win_size_ms = 25
62 | percent_overlap = 0.5
63 | sr = 22050
64 | fft_bins = None
65 | num_mfcc = None
66 | real_signal = True
67 |
68 | get_feats_kwargs = dict(feature_type = feature_type,
69 | sr = sr,
70 | dur_sec = dur_sec,
71 | win_size_ms = win_size_ms,
72 | percent_overlap = percent_overlap,
73 | fft_bins = fft_bins,
74 | num_filters = num_filters,
75 | num_mfcc = num_mfcc,
76 | rate_of_change = rate_of_change,
77 | rate_of_acceleration = rate_of_acceleration,
78 | real_signal = real_signal)
79 |
80 | ######################################################
81 | # Setup an Augmentation Dictionary
82 | # --------------------------------
83 | # This will apply augmentations at random at each epoch.
84 | augmentation_all = dict([('add_white_noise',True),
85 | ('speed_decrease', True),
86 | ('speed_increase', True),
87 | ('pitch_decrease', True),
88 | ('pitch_increase', True),
89 | ('harmonic_distortion', True),
90 | ('vtlp', True)
91 | ])
92 |
93 | ##########################################################
94 | # see the default values for these augmentations
95 | augment_settings_dict = {}
96 | for key in augmentation_all.keys():
97 | augment_settings_dict[key] = sp.augment.get_augmentation_settings_dict(key)
98 | for key, value in augment_settings_dict.items():
99 | print(key, ' : ', value)
100 |
101 | ##########################################################
102 | # Adjust Augmentation Defaults
103 | # ----------------------------
104 |
105 |
106 | ##########################################################
107 | # Adjust Add White Noise
108 | # ~~~~~~~~~~~~~~~~~~~~~~
109 | # I want the SNR of the white noise to vary between several:
110 | # SNR 10, 15, and 20.
111 | augment_settings_dict['add_white_noise']['snr'] = [10,15,20]
112 |
113 | ##########################################################
114 | # Adjust Pitch Decrease
115 | # ~~~~~~~~~~~~~~~~~~~~~
116 | # I found the pitch changes too exaggerated, so I will
117 | # set those to 1 instead of 2 semitones.
118 | augment_settings_dict['pitch_decrease']['num_semitones'] = 1
119 |
120 | ##########################################################
121 | # Adjust Pitch Increase
122 | # ~~~~~~~~~~~~~~~~~~~~~
123 | augment_settings_dict['pitch_increase']['num_semitones'] = 1
124 |
125 | ##########################################################
126 | # Adjust Speed Decrease
127 | # ~~~~~~~~~~~~~~~~~~~~~
128 | augment_settings_dict['speed_decrease']['perc'] = 0.1
129 |
130 | ##########################################################
131 | # Adjust Speed Increase
132 | # ~~~~~~~~~~~~~~~~~~~~~
133 | augment_settings_dict['speed_increase']['perc'] = 0.1
134 |
135 |
136 | ######################################################
137 | # Update an Augmentation Dictionary
138 | # ---------------------------------
139 | # We'll include in the dictionary the settings we want for augmentations:
140 | augmentation_all.update(
141 | dict(augment_settings_dict = augment_settings_dict))
142 |
143 |
144 | ######################################################
145 | # Train the Model
146 | # ===============
147 | # Note: disregard the warning:
148 | # WARNING: Only the power spectrum of the VTLP augmented signal can be returned due to resizing the augmentation from (56, 4401) to (79, 276)
149 | #
150 | # This is due to the hyper frequency resolution applied to the audio during
151 | # vocal-tract length perturbation, and then deresolution to bring to correct size.
152 | # The current implementation applies the deresolution to the power spectrum rather than
153 | # directly to the STFT.
154 | model_dir, history = spdl.envclassifier_extract_train(
155 | model_name = 'augment_builtin_speechcommands',
156 | audiodata_path = data_dir,
157 | augment_dict = augmentation_all,
158 | labeled_data = True,
159 | batch_size = 1,
160 | epochs = 50,
161 | patience = 5,
162 | visualize = True,
163 | vis_every_n_items = 1,
164 | **get_feats_kwargs)
165 |
166 | #############################################################
167 | # Let's plot how the model performed (on this small dataset)
168 | plt.clf()
169 | plt.plot(history.history['accuracy'])
170 | plt.plot(history.history['val_accuracy'])
171 | plt.title('model accuracy')
172 | plt.ylabel('accuracy')
173 | plt.xlabel('epoch')
174 | plt.legend(['train', 'val'], loc='upper right')
175 | plt.savefig('accuracy.png')
176 |
--------------------------------------------------------------------------------
/docs/source/examples/plot_featureprep_denoiser.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =======================================================
4 | Feature Extraction for Denoising: Clean and Noisy Audio
5 | =======================================================
6 |
7 | Extract acoustic features from clean and noisy datasets for
8 | training a denoising model, e.g. a denoising autoencoder.
9 |
10 | To see how soundpy implements this, see `soundpy.builtin.denoiser_feats`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 |
17 | #####################################################################
18 | import os, sys
19 | import inspect
20 | currentdir = os.path.dirname(os.path.abspath(
21 | inspect.getfile(inspect.currentframe())))
22 | parentdir = os.path.dirname(currentdir)
23 | parparentdir = os.path.dirname(parentdir)
24 | packagedir = os.path.dirname(parparentdir)
25 | sys.path.insert(0, packagedir)
26 |
27 | import soundpy as sp
28 | import IPython.display as ipd
29 | package_dir = '../../../'
30 | os.chdir(package_dir)
31 | sp_dir = package_dir
32 |
33 | ######################################################
34 | # Prepare for Extraction: Data Organization
35 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
36 |
37 | ######################################################
38 | # I will use a mini denoising dataset as an example
39 |
40 | # Example noisy data:
41 | data_noisy_dir = '{}../mini-audio-datasets/denoise/noisy'.format(sp_dir)
42 | # Example clean data:
43 | data_clean_dir = '{}../mini-audio-datasets/denoise/clean'.format(sp_dir)
44 | # Where to save extracted features:
45 | data_features_dir = './audiodata/example_feats_models/denoiser/'
46 |
47 | ######################################################
48 | # Choose Feature Type
49 | # ~~~~~~~~~~~~~~~~~~~
50 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
51 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
52 |
53 | feature_type = 'stft'
54 | sr = 22050
55 |
56 | ######################################################
57 | # Set Duration of Audio
58 | # ~~~~~~~~~~~~~~~~~~~~~
59 | # How much audio in seconds used from each audio file.
60 | # the speech samples are about 3 seconds long.
61 | dur_sec = 3
62 |
63 | #######################################################################
64 | # Option 1: Built-In Functionality: soundpy does everything for you
65 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
66 |
67 | ############################################################
68 | # Define which data to use and which features to extract.
69 | # NOTE: beacuse of the very small dataset, will set
70 | # `perc_train` to a lower level than 0.8. (Otherwise, will raise error)
71 | # Everything else is based on defaults. A feature folder with
72 | # the feature data will be created in the current working directory.
73 | # (Although, you can set this under the parameter `data_features_dir`)
74 | # `visualize` saves periodic images of the features extracted.
75 | # This is useful if you want to know what's going on during the process.
76 | perc_train = 0.6 # with larger datasets this would be around 0.8
77 | extraction_dir = sp.denoiser_feats(
78 | data_clean_dir = data_clean_dir,
79 | data_noisy_dir = data_noisy_dir,
80 | sr = sr,
81 | feature_type = feature_type,
82 | dur_sec = dur_sec,
83 | perc_train = perc_train,
84 | visualize=True);
85 | extraction_dir
86 |
87 | ################################################################
88 | # The extracted features, extraction settings applied, and
89 | # which audio files were assigned to which datasets
90 | # will be saved in the `extraction_dir` directory
91 |
92 |
93 | ############################################################
94 | # Logged Information
95 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
96 | # Let's have a look at the files in the extraction_dir. The files ending
97 | # with .npy extension contain the feature data; the .csv files contain
98 | # logged information.
99 | featfiles = list(extraction_dir.glob('*.*'))
100 | for f in featfiles:
101 | print(f.name)
102 |
103 | ############################################################
104 | # Feature Settings
105 | # ~~~~~~~~~~~~~~~~~~
106 | # Since much was conducted behind the scenes, it's nice to know how the features
107 | # were extracted, for example, the sample rate and number of frequency bins applied, etc.
108 | feat_settings = sp.utils.load_dict(
109 | extraction_dir.joinpath('log_extraction_settings.csv'))
110 | for key, value in feat_settings.items():
111 | print(key, ' ---> ', value)
112 |
113 |
--------------------------------------------------------------------------------
/docs/source/examples/plot_featureprep_envclassifier.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =====================================
4 | Feature Extraction for Classification
5 | =====================================
6 |
7 | Extract acoustic features from labeled data for
8 | training an environment or speech classifier.
9 |
10 | To see how soundpy implements this, see `soundpy.builtin.envclassifier_feats`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 |
17 |
18 | #####################################################################
19 | import os, sys
20 | import inspect
21 | currentdir = os.path.dirname(os.path.abspath(
22 | inspect.getfile(inspect.currentframe())))
23 | parentdir = os.path.dirname(currentdir)
24 | parparentdir = os.path.dirname(parentdir)
25 | packagedir = os.path.dirname(parparentdir)
26 | sys.path.insert(0, packagedir)
27 |
28 | import soundpy as sp
29 | import IPython.display as ipd
30 | package_dir = '../../../'
31 | os.chdir(package_dir)
32 | sp_dir = package_dir
33 |
34 | ######################################################
35 | # Prepare for Extraction: Data Organization
36 | # -----------------------------------------
37 |
38 | ######################################################
39 | # I will use a sample speech commands data set:
40 |
41 | ##########################################################
42 | # Designate path relevant for accessing audiodata
43 | data_dir = '{}../mini-audio-datasets/speech_commands/'.format(sp_dir)
44 |
45 | ######################################################
46 | # Choose Feature Type
47 | # ~~~~~~~~~~~~~~~~~~~
48 | # We can extract 'mfcc', 'fbank', 'powspec', and 'stft'.
49 | # if you are working with speech, I suggest 'fbank', 'powspec', or 'stft'.
50 |
51 | feature_type = 'fbank'
52 |
53 | ######################################################
54 | # Set Duration of Audio
55 | # ~~~~~~~~~~~~~~~~~~~~~
56 | # How much audio in seconds used from each audio file.
57 | # The example noise and speech files are only 1 second long
58 | dur_sec = 1
59 |
60 |
61 | #############################################################
62 | # Built-In Functionality - soundpy extracts the features for you
63 | # ---------------------------------------------------------------
64 |
65 | ############################################################
66 | # Define which data to use and which features to extract
67 | # Everything else is based on defaults. A feature folder with
68 | # the feature data will be created in the current working directory.
69 | # (Although, you can set this under the parameter `data_features_dir`)
70 | # `visualize` saves periodic images of the features extracted.
71 | # This is useful if you want to know what's going on during the process.
72 | extraction_dir = sp.envclassifier_feats(data_dir,
73 | feature_type=feature_type,
74 | dur_sec=dur_sec,
75 | visualize=True);
76 |
77 | ################################################################
78 | # The extracted features, extraction settings applied, and
79 | # which audio files were assigned to which datasets
80 | # will be saved in the following directory:
81 | extraction_dir
82 |
83 | ############################################################
84 | # Logged Information
85 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
86 | # Let's have a look at the files in the extraction_dir. The files ending
87 | # with .npy extension contain the feature data; the .csv files contain
88 | # logged information.
89 | featfiles = list(extraction_dir.glob('*.*'))
90 | for f in featfiles:
91 | print(f.name)
92 |
93 | ############################################################
94 | # Feature Settings
95 | # ~~~~~~~~~~~~~~~~~~
96 | # Since much was conducted behind the scenes, it's nice to know how the features
97 | # were extracted, for example, the sample rate and number of frequency bins applied, etc.
98 | feat_settings = sp.utils.load_dict(
99 | extraction_dir.joinpath('log_extraction_settings.csv'))
100 | for key, value in feat_settings.items():
101 | print(key, ' ---> ', value)
102 |
103 |
104 | ############################################################
105 | # Labeled Data
106 | # ~~~~~~~~~~~~~~~~~~
107 | # These are the labels and their encoded values:
108 | encode_dict = sp.utils.load_dict(
109 | extraction_dir.joinpath('dict_encode.csv'))
110 | for key, value in encode_dict.items():
111 | print(key, ' ---> ', value)
112 |
--------------------------------------------------------------------------------
/docs/source/examples/plot_filter_out_noise.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 | """
4 | ===========================
5 | Filter Out Background Noise
6 | ===========================
7 |
8 | Filter out background noise from noisy speech signals.
9 |
10 | To see how soundpy implements this, see `soundpy.builtin.filtersignal`.
11 |
12 | As a general note for filtering, the Wiener Filter is the default filter for soundpy. It seems to filter signals more consequently than the Band Spectral Subtraction Filter.
13 | """
14 |
15 |
16 | ###############################################################################################
17 | #
18 |
19 |
20 | #####################################################################
21 |
22 | # Let's import soundpy, and ipd for playing audio data
23 | import soundpy as sp
24 | import IPython.display as ipd
25 |
26 |
27 | ######################################################
28 | # Define the noisy and clean speech audio files.
29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
30 | # Note: these files are available in the soundpy repo.
31 | # Designate path relevant for accessing audiodata
32 | sp_dir = '../../../'
33 |
34 | ##########################################################
35 | # Noise sample:
36 | noise = '{}audiodata/background_samples/traffic.wav'.format(sp_dir)
37 | noise = sp.string2pathlib(noise)
38 | speech = '{}audiodata/python.wav'.format(sp_dir)
39 | speech = sp.utils.string2pathlib(speech)
40 |
41 | ##########################################################
42 | # For filtering, we will set the sample rate to be quite high:
43 | sr = 48000
44 |
45 | ##########################################################
46 | # Create noisy speech signal as SNR 10
47 | noisy, snr_measured = sp.dsp.add_backgroundsound(
48 | speech,
49 | noise,
50 | sr = sr,
51 | snr = 10,
52 | total_len_sec = 2,
53 | pad_mainsound_sec = 0.5)
54 |
55 | ##########################################################
56 | # Hear and see the noisy speech
57 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
58 |
59 | ipd.Audio(noisy,rate=sr)
60 |
61 | ##########################################################
62 | sp.plotsound(noisy, sr=sr, feature_type='signal',
63 | title = 'Noisy Speech', subprocess=True)
64 |
65 |
66 | ##########################################################
67 | # Hear and see the clean speech
68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
69 | s, sr = sp.loadsound(speech, sr=sr)
70 | ipd.Audio(s,rate=sr)
71 |
72 | ##########################################################
73 | sp.plotsound(s, sr=sr, feature_type='signal',
74 | title = 'Clean Speech', subprocess=True)
75 |
76 |
77 | ##########################################################
78 | # Filter the noisy speech
79 | # ^^^^^^^^^^^^^^^^^^^^^^^
80 |
81 | ##########################################################
82 | # Wiener Filter
83 | # ~~~~~~~~~~~~~
84 |
85 | ##########################################################
86 | # Let's filter with a Wiener filter:
87 | noisy_wf, sr = sp.filtersignal(noisy,
88 | sr = sr,
89 | filter_type = 'wiener') # default
90 |
91 | ##########################################################
92 | ipd.Audio(noisy_wf,rate=sr)
93 |
94 | ##########################################################
95 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal',
96 | title = 'Noisy Speech: Wiener Filter',
97 | subprocess=True)
98 |
99 | #################################################################
100 | # Wiener Filter with Postfilter
101 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
102 |
103 | ##########################################################
104 | # Let's filter with a Wiener filter and postfilter
105 | noisy_wfpf, sr = sp.filtersignal(noisy,
106 | sr = sr,
107 | filter_type = 'wiener',
108 | apply_postfilter = True)
109 |
110 | ##########################################################
111 | ipd.Audio(noisy_wfpf,rate=sr)
112 |
113 | ##########################################################
114 | sp.plotsound(noisy_wfpf, sr=sr, feature_type = 'signal',
115 | title = 'Noisy Speech: Wiener Filter with Postfilter',
116 | subprocess=True)
117 |
118 | #################################################################
119 | # Band Spectral Subtraction
120 | # ~~~~~~~~~~~~~~~~~~~~~~~~~
121 |
122 | ##########################################################
123 | # Let's filter using band spectral subtraction
124 | noisy_bs, sr = sp.filtersignal(noisy,
125 | sr = sr,
126 | filter_type = 'bandspec')
127 |
128 | ##########################################################
129 | ipd.Audio(noisy_bs,rate=sr)
130 |
131 | ##########################################################
132 | sp.plotsound(noisy_bs, sr = sr, feature_type = 'signal',
133 | title = 'Noisy Speech: Band Spectral Subtraction',
134 | subprocess=True)
135 |
136 |
137 | #################################################################
138 | # Band Spectral Subtraction with Postfilter
139 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
140 |
141 | #########################################################################
142 | # Finally, let's filter using band spectral subtraction with a postfilter
143 | noisy_bspf, sr = sp.filtersignal(noisy,
144 | sr = sr,
145 | filter_type = 'bandspec',
146 | apply_postfilter = True)
147 |
148 | ##########################################################
149 | ipd.Audio(noisy_bspf,rate=sr)
150 |
151 | ##########################################################
152 | sp.plotsound(noisy_bspf, sr = sr, feature_type = 'signal',
153 | title = 'Noisy Speech: Band Spectral Subtraction with Postfilter',
154 | subprocess=True)
155 |
156 |
157 | ##########################################################
158 | # Filter: increase the scale
159 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^
160 |
161 | ##########################################################
162 | # Let's filter with a Wiener filter:
163 | filter_scale = 5
164 | noisy_wf, sr = sp.filtersignal(noisy,
165 | sr=sr,
166 | filter_type = 'wiener',
167 | filter_scale = filter_scale)
168 |
169 | ##########################################################
170 | # Wiener Filter
171 | # ~~~~~~~~~~~~~
172 |
173 | ##########################################################
174 | ipd.Audio(noisy_wf,rate=sr)
175 |
176 | ##########################################################
177 | sp.plotsound(noisy_wf, sr = sr, feature_type = 'signal',
178 | title = 'Noisy Speech: Wiener Filter Scale {}'.format(filter_scale),
179 | subprocess=True)
180 |
181 | #################################################################
182 | # Wiener Filter with Postfilter
183 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
184 |
185 | ##########################################################
186 | # Let's filter with a Wiener filter and postfilter
187 | noisy_wfpf, sr = sp.filtersignal(noisy,
188 | sr = sr,
189 | filter_type = 'wiener',
190 | apply_postfilter = True,
191 | filter_scale = filter_scale)
192 |
193 | ##########################################################
194 | ipd.Audio(noisy_wfpf,rate = sr)
195 |
196 | ##########################################################
197 | sp.plotsound(noisy_wfpf, sr = sr, feature_type = 'signal',
198 | title = 'Noisy Speech: Wiener Filter with Postfilter Scale {}'.format(filter_scale),
199 | subprocess=True)
200 |
--------------------------------------------------------------------------------
/docs/source/examples/plot_implement_denoiser.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =================================
4 | Implement a Denoising Autoencoder
5 | =================================
6 |
7 | Implement denoising autoencoder to denoise a noisy speech signal.
8 |
9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_run`.
10 | """
11 |
12 |
13 | ############################################################################################
14 | #
15 |
16 | #####################################################################
17 | # Let's import soundpy and other packages
18 | import soundpy as sp
19 | import numpy as np
20 | # for playing audio in this notebook:
21 | import IPython.display as ipd
22 |
23 | #####################################################################
24 | # As well as the deep learning component of soundpy
25 | from soundpy import models as spdl
26 |
27 | ######################################################
28 | # Prepare for Implementation: Data Organization
29 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
30 |
31 | ##########################################################
32 | # Set path relevant for audio data for this example
33 | sp_dir = '../../../'
34 |
35 | ######################################################
36 | # Set model pathway
37 | # ~~~~~~~~~~~~~~~~~
38 | # Currently, this expects a model saved with weights, with a .h5 extension.
39 | # (See `model` below)
40 |
41 | ######################################################
42 | # The soundpy repo offers a pre-trained denoiser, which we'll use.
43 | model = '{}audiodata/models/'.format(sp_dir)+\
44 | 'denoiser/example_denoiser_stft.h5'
45 | # ensure is a pathlib.PosixPath object
46 | print(model)
47 | model = sp.utils.string2pathlib(model)
48 | model_dir = model.parent
49 |
50 | #########################################################
51 | # What is in this folder?
52 | files = list(model_dir.glob('*.*'))
53 | for f in files:
54 | print(f.name)
55 |
56 | ######################################################
57 | # Provide dictionary with feature extraction settings
58 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 |
60 | #########################################################
61 | # If soundpy extracts features for you, a 'log_extraction_settings.csv'
62 | # file will be saved, which includes relevant feature settings for implementing
63 | # the model; see `soundpy.feats.save_features_datasets`
64 | feat_settings = sp.utils.load_dict(
65 | model_dir.joinpath('log_extraction_settings.csv'))
66 | for key, value in feat_settings.items():
67 | print(key, ' --> ', value)
68 | # change objects that were string to original format
69 | import ast
70 | try:
71 | feat_settings[key] = ast.literal_eval(value)
72 | except ValueError:
73 | pass
74 | except SyntaxError:
75 | pass
76 |
77 | #########################################################
78 | # For the purposes of plotting, let's use some of the settings defined:
79 | feature_type = feat_settings['feature_type']
80 | sr = feat_settings['sr']
81 |
82 | ######################################################
83 | # Provide new audio for the denoiser to denoise!
84 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
85 |
86 | #########################################################
87 | # We'll use sample speech from the soundpy repo:
88 | speech = sp.string2pathlib('{}audiodata/python.wav'.format(sp_dir))
89 | s, sr = sp.loadsound(speech, sr=sr)
90 |
91 | #########################################################
92 | # Let's add some white noise (10 SNR)
93 | s_n = sp.augment.add_white_noise(s, sr=sr, snr=10)
94 |
95 | ##############################################################
96 | # What does the noisy audio sound like?
97 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
98 | ipd.Audio(s_n,rate=sr)
99 |
100 | ##############################################################
101 | # What does the noisy audio look like?
102 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
103 | sp.plotsound(s_n, sr = sr, feature_type='signal', subprocess=True)
104 |
105 | ##############################################################
106 | # What does the clean audio sound like?
107 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 | ipd.Audio(s,rate=sr)
109 |
110 | ##############################################################
111 | # What does the clean audio look like?
112 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
113 | sp.plotsound(s, sr = sr, feature_type='signal', subprocess=True)
114 |
115 | #########################################################################
116 | # Built-In Denoiser Functionality
117 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
118 |
119 | ##############################################################
120 | # We just need to feed the model path, the noisy sample path, and
121 | # the feature settings dictionary we looked at above.
122 | y, sr = spdl.denoiser_run(model, s_n, feat_settings)
123 |
124 | ##########################################################
125 | # How does the output sound?
126 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~
127 | ipd.Audio(y,rate=sr)
128 |
129 | ##########################################################
130 | # How does is the output look?
131 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
132 | sp.plotsound(y, sr=sr, feature_type = feature_type, subprocess=True)
133 |
134 | ##########################################################
135 | # How do the features compare?
136 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137 |
138 | ##########################################################
139 | # STFT features of the noisy input speech:
140 | sp.plotsound(s_n, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
141 | title = 'Noisy input: STFT features', subprocess=True)
142 |
143 | ##########################################################
144 | # STFT features of the output
145 | sp.plotsound(y, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
146 | title = 'Denoiser Output: STFT features', subprocess=True)
147 |
148 | ##########################################################
149 | # STFT features of the clean version of the audio:
150 | sp.plotsound(s, sr=sr, feature_type = 'stft', energy_scale = 'power_to_db',
151 | title = 'Clean "target" audio: STFT features', subprocess=True)
152 |
153 |
154 | ##########################################################
155 | # It's not perfect but for a pretty simple implementation, the noise is gone
156 | # and you can hear the person speaking. Pretty cool!
157 |
--------------------------------------------------------------------------------
/docs/source/examples/plot_signals_and_features.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =======================
4 | Create and Plot Signals
5 | =======================
6 |
7 | Create and plot signals / noise; combine them at a specific SNR.
8 |
9 | To see how soundpy implements this, see `soundpy.dsp.generate_sound`,
10 | `soundpy.dsp.generate_noise` and `soundpy.dsp.add_backgroundsound`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 |
17 | #####################################################################
18 | # Let's import soundpy
19 | import soundpy as sp
20 |
21 | ###########################################################################
22 | # Create a Signal
23 | # ^^^^^^^^^^^^^^^
24 |
25 | ########################################################################
26 | # First let's set what sample rate we want to use
27 | sr = 44100
28 |
29 |
30 | #########################################################################
31 | # Let's create a signal of 10 Hz
32 | sig1_hz = 10
33 | sig1, sr = sp.generate_sound(freq=sig1_hz, amplitude = 0.4, sr=sr, dur_sec=1)
34 | sp.plotsound(sig1, sr=sr, feature_type = 'signal',
35 | title = 'Signal: {} Hz'.format(sig1_hz), subprocess=True)
36 |
37 |
38 | #########################################################################
39 | # Let's create a signal of 20 Hz
40 | sig2_hz = 20
41 | sig2, sr = sp.generate_sound(freq=sig2_hz, amplitude= 0.4, sr=sr, dur_sec=1)
42 | sp.plotsound(sig2, sr=sr, feature_type = 'signal',
43 | title = 'Signal: {} Hz'.format(sig2_hz), subprocess=True)
44 |
45 | ###########################################################################
46 | # Combine Signals
47 | # ^^^^^^^^^^^^^^^
48 |
49 |
50 | #########################################################################
51 | # Add them together and see what they look like:
52 | sig3 = sig1 + sig2
53 | sp.plotsound(sig3, sr=sr, feature_type = 'signal',
54 | title='Mixed Signals: {} Hz + {} Hz'.format(sig1_hz, sig2_hz),
55 | subprocess=True)
56 |
57 |
58 | ##########################################################################
59 | # Generate Pseudo-Random Noise
60 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
61 |
62 |
63 | #########################################################################
64 | # Create noise to add to the signal:
65 | noise = sp.generate_noise(len(sig3), amplitude=0.02, random_seed=40)
66 | sp.plotsound(noise, sr=sr, feature_type = 'signal',
67 | title='Random Noise', subprocess=True)
68 |
69 | ###########################################################################
70 | # Control SNR: Adding a Background Sound
71 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
72 |
73 | #########################################################################
74 | # Add noise at signal-to-noise ratio of 40
75 | sig_noisy, snr = sp.dsp.add_backgroundsound(
76 | audio_main = sig3,
77 | audio_background = noise,
78 | sr = sr,
79 | snr = 40,
80 | clip_at_zero = False)
81 |
82 | # keep energy between 1 and -1
83 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
84 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 40 SNR',
85 | subprocess=True)
86 |
87 | #########################################################################
88 | # Add noise at signal-to-noise ratio of 20
89 | sig_noisy, snr = sp.dsp.add_backgroundsound(
90 | audio_main = sig3,
91 | audio_background = noise,
92 | sr = sr,
93 | snr = 20)
94 | # keep energy between 1 and -1
95 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
96 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 20 SNR',
97 | subprocess=True)
98 |
99 | #########################################################################
100 | # Add noise at signal-to-noise ratio of 10
101 | sig_noisy, snr = sp.dsp.add_backgroundsound(
102 | audio_main = sig3,
103 | audio_background = noise,
104 | sr = sr,
105 | snr = 10)
106 | # keep energy between 1 and -1
107 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
108 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 10 SNR',
109 | subprocess=True)
110 |
111 | #########################################################################
112 | # Add noise at signal-to-noise ratio of 0
113 | sig_noisy, snr = sp.dsp.add_backgroundsound(
114 | audio_main = sig3,
115 | audio_background = noise,
116 | sr = sr,
117 | snr = 0)
118 | # keep energy between 1 and -1
119 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
120 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: 0 SNR',
121 | subprocess=True)
122 |
123 |
124 | #########################################################################
125 | # Add noise at signal-to-noise ratio of -10
126 | sig_noisy, snr = sp.dsp.add_backgroundsound(
127 | audio_main = sig3,
128 | audio_background = noise,
129 | sr = sr,
130 | snr = -10)
131 | # keep energy between 1 and -1
132 | sig_noisy = sp.dsp.scalesound(sig_noisy, max_val=1)
133 | sp.plotsound(sig_noisy, sr=sr, feature_type = 'signal', title='Signal + Noise: -10 SNR',
134 | subprocess=True)
135 |
--------------------------------------------------------------------------------
/docs/source/examples/plot_train_classifier.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | ============================
4 | Train an Acoustic Classifier
5 | ============================
6 |
7 | Train an acoustic classifier on speech or noise features.
8 |
9 | To see how soundpy implements this, see `soundpy.models.builtin.envclassifier_train`.
10 | """
11 |
12 | ###############################################################################################
13 | #
14 | import os, sys
15 | import inspect
16 | currentdir = os.path.dirname(os.path.abspath(
17 | inspect.getfile(inspect.currentframe())))
18 | parentdir = os.path.dirname(currentdir)
19 | parparentdir = os.path.dirname(parentdir)
20 | packagedir = os.path.dirname(parparentdir)
21 | sys.path.insert(0, packagedir)
22 |
23 | import matplotlib.pyplot as plt
24 | import IPython.display as ipd
25 | package_dir = '../../../'
26 | os.chdir(package_dir)
27 | sp_dir = package_dir
28 |
29 |
30 | #####################################################################
31 | # Let's import soundpy for handling sound
32 | import soundpy as sp
33 | #####################################################################
34 | # As well as the deep learning component of soundpy
35 | from soundpy import models as spdl
36 |
37 |
38 | ######################################################
39 | # Prepare for Training: Data Organization
40 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
41 |
42 | ##########################################################
43 | # Set path relevant for audio data for this example
44 |
45 | ######################################################
46 | # I will load previously extracted features (from the Speech Commands Dataset)
47 | # See `soundpy.feats.save_features_datasets` or `soundpy.builtin.envclassifier_feats`
48 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
49 | 'envclassifier/example_feats_fbank/'
50 |
51 | #########################################################
52 | # What is in this folder?
53 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
54 | files = list(feature_extraction_dir.glob('*.*'))
55 | for f in files:
56 | print(f.name)
57 |
58 | #########################################################
59 | # The .npy files contain the features themselves, in train, validation, and
60 | # test datasets:
61 | files = list(feature_extraction_dir.glob('*.npy'))
62 | for f in files:
63 | print(f.name)
64 |
65 | #########################################################
66 | # The .csv files contain information about how the features were extracted
67 | files = list(feature_extraction_dir.glob('*.csv'))
68 | for f in files:
69 | print(f.name)
70 |
71 | #########################################################
72 | # We'll have a look at which features were extracted and other settings:
73 | feat_settings = sp.utils.load_dict(
74 | feature_extraction_dir.joinpath('log_extraction_settings.csv'))
75 | for key, value in feat_settings.items():
76 | print(key, ' --> ', value)
77 |
78 | #########################################################
79 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
80 |
81 | #########################################################
82 | # We'll have a look at the audio files that were assigned
83 | # to the train, val, and test datasets.
84 | audio_datasets = sp.utils.load_dict(
85 | feature_extraction_dir.joinpath('dataset_audiofiles.csv'))
86 | count = 0
87 | for key, value in audio_datasets.items():
88 | print(key, ' --> ', value)
89 | count += 1
90 | if count > 5:
91 | break
92 |
93 | #############################################################
94 | # Built-In Functionality: soundpy does everything for you
95 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
96 | # For more about this function, see `soundpy.models.builtin.envclassifier_train`.
97 |
98 | #############################################################
99 | model_dir, history = spdl.envclassifier_train(
100 | feature_extraction_dir = feature_extraction_dir,
101 | epochs = 10,
102 | patience = 5)
103 |
104 | #############################################################
105 | # Where the model and logs are located:
106 | model_dir
107 |
108 | #############################################################
109 | # Let's plot how the model performed (on this mini dataset)
110 | import matplotlib.pyplot as plt
111 | plt.clf()
112 | plt.plot(history.history['accuracy'])
113 | plt.plot(history.history['val_accuracy'])
114 | plt.title('model accuracy')
115 | plt.ylabel('accuracy')
116 | plt.xlabel('epoch')
117 | plt.legend(['train', 'val'], loc='upper right')
118 | plt.savefig('accuracy.png')
119 |
--------------------------------------------------------------------------------
/docs/source/examples/plot_train_denoiser.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | =============================
4 | Train a Denoising Autoencoder
5 | =============================
6 |
7 | Train a denoising autoencoder with clean and noisy acoustic features.
8 |
9 | To see how soundpy implements this, see `soundpy.models.builtin.denoiser_train`,
10 | `soundpy.builtin.denoiser_feats` and `soundpy.builtin.create_denoise_data`.
11 | """
12 |
13 |
14 | ###############################################################################################
15 | #
16 | import os, sys
17 | import inspect
18 | currentdir = os.path.dirname(os.path.abspath(
19 | inspect.getfile(inspect.currentframe())))
20 | parentdir = os.path.dirname(currentdir)
21 | parparentdir = os.path.dirname(parentdir)
22 | packagedir = os.path.dirname(parparentdir)
23 | sys.path.insert(0, packagedir)
24 |
25 | import matplotlib.pyplot as plt
26 | import IPython.display as ipd
27 | package_dir = '../../../'
28 | os.chdir(package_dir)
29 | sp_dir = package_dir
30 |
31 |
32 | #####################################################################
33 | # Let's import soundpy for handling sound
34 | import soundpy as sp
35 | #####################################################################
36 | # As well as the deep learning component of soundpy
37 | from soundpy import models as spdl
38 |
39 |
40 | ######################################################
41 | # Prepare for Training: Data Organization
42 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
43 |
44 | ##########################################################
45 | # Designate path relevant for accessing audiodata
46 |
47 |
48 | ######################################################
49 | # I will load previously extracted features (sample data), see `soundpy.feats.save_features_datasets` or `soundpy.builtin.denoiser_feats`
50 | feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\
51 | 'denoiser/example_feats_fbank/'
52 |
53 | #########################################################
54 | # What is in this folder?
55 | feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)
56 | files = list(feature_extraction_dir.glob('*.*'))
57 | for f in files:
58 | print(f.name)
59 |
60 | #########################################################
61 | # The .npy files contain the features themselves, in train, validation, and
62 | # test datasets:
63 | files = list(feature_extraction_dir.glob('*.npy'))
64 | for f in files:
65 | print(f.name)
66 |
67 | #########################################################
68 | # The .csv files contain information about how the features were extracted
69 | files = list(feature_extraction_dir.glob('*.csv'))
70 | for f in files:
71 | print(f.name)
72 |
73 | #########################################################
74 | # We'll have a look at which features were extracted and other settings:
75 | feat_settings = sp.utils.load_dict(
76 | feature_extraction_dir.joinpath('log_extraction_settings.csv'))
77 | for key, value in feat_settings.items():
78 | print(key, ' --> ', value)
79 |
80 | #########################################################
81 | # For more about these settings, see `soundpy.feats.save_features_datasets`.
82 |
83 | #########################################################
84 | # We'll have a look at the audio files that were assigned
85 | # to the train, val, and test datasets.
86 | audio_datasets = sp.utils.load_dict(
87 | feature_extraction_dir.joinpath('audiofiles_datasets_clean.csv'))
88 | count = 0
89 | for key, value in audio_datasets.items():
90 | print(key, ' --> ', value)
91 | count += 1
92 | if count > 5:
93 | break
94 |
95 | #############################################################
96 | # Built-In Functionality: soundpy does everything for you
97 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
98 | # For more about this, see `soundpy.builtin.denoiser_train`.
99 |
100 | #############################################################
101 | model_dir, history = spdl.denoiser_train(
102 | feature_extraction_dir = feature_extraction_dir,
103 | epochs = 10)
104 |
105 | #########################################################
106 |
107 |
108 | #############################################################
109 | # Where the model and logs are located:
110 | model_dir
111 |
112 |
113 | #############################################################
114 | # Let's plot how the model performed (on this mini dataset)
115 | import matplotlib.pyplot as plt
116 | plt.plot(history.history['loss'])
117 | plt.plot(history.history['val_loss'])
118 | plt.title('model loss')
119 | plt.ylabel('loss')
120 | plt.xlabel('epoch')
121 | plt.legend(['train', 'val'], loc='upper right')
122 | plt.savefig('loss.png')
123 |
--------------------------------------------------------------------------------
/docs/source/exceptions.rst:
--------------------------------------------------------------------------------
1 |
2 | Customized Errors
3 | -----------------
4 |
5 | .. automodule:: soundpy.exceptions
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/feats.rst:
--------------------------------------------------------------------------------
1 |
2 | Extract and manipulate audio features
3 | -------------------------------------
4 |
5 | .. automodule:: soundpy.feats
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/files.rst:
--------------------------------------------------------------------------------
1 |
2 | Working with audio files
3 | ------------------------
4 |
5 | .. automodule:: soundpy.files
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/filters.rst:
--------------------------------------------------------------------------------
1 |
2 | Filters: Wiener and Band Spectral Subtraction
3 | ---------------------------------------------
4 |
5 | .. automodule:: soundpy.filters
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
10 | .. autoclass:: soundpy.filters.FilterSettings
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | .. automethod:: __init__
16 |
17 | .. autoclass:: soundpy.filters.Filter
18 | :members:
19 | :undoc-members:
20 | :show-inheritance:
21 |
22 | .. automethod:: __init__
23 |
24 |
25 | .. autoclass:: soundpy.filters.WienerFilter
26 | :members:
27 | :undoc-members:
28 | :show-inheritance:
29 |
30 | .. automethod:: __init__
31 |
32 |
33 | .. autoclass:: soundpy.filters.BandSubtraction
34 | :members:
35 | :undoc-members:
36 | :show-inheritance:
37 |
38 | .. automethod:: __init__
39 |
40 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. SoundPy documentation master file, created by
2 | sphinx-quickstart on Mon Jun 15 11:57:18 2020.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | SoundPy v0.1.0a3
7 | ================
8 |
9 | Welcome to the docs!
10 | --------------------
11 |
12 |
13 | SoundPy is a research based Python package_ for exploring and experimenting with sound and deep learning. NOTE: SoundPy is in alpha stage of development; please forgive any bugs that pop up, and also feel free/encouraged to open an issue_.
14 |
15 | Those who might find this useful:
16 |
17 | * speech and sound enthusiasts
18 | * digital signal processing / mathematics / physics / acoustics enthusiasts
19 | * deep learning enthusiasts
20 | * researchers
21 | * linguists
22 | * psycholinguists
23 |
24 | The main goal of SoundPy is to provide the code and functionality with more context via visualization, research, and mathematics. Most of the resources used to build the functionality stems from publicly available research and datasets.
25 |
26 | As it covers quite a large range, from audio file conversion to implementation of trained neural networks, the purpose of SoundPy is not to be the perfect implementation of all functions (although that is also a goal :P ), but rather a peak into how they *can* be implemented, hopefully offering people a foundation for trying out different ways of implementation (feature extraction, building neural networks, etc.).
27 |
28 | This project is still in the beginning stages and has a lot of room for growth, especially with contributors having a background / knowlege in data science, computer science, machine and deep learning, physics, acoustics, or dsp. Contributors from other backgrounds are also welcome! If you'd like SoundPy to do something it doesn't, try making it or create an issue.
29 |
30 | .. _PyPI: https://pypi.org/project/soundpy/
31 |
32 | .. _package: https://github.com/a-n-rose/Python-Sound-Tool/tree/development
33 |
34 | .. _issue: https://github.com/a-n-rose/Python-Sound-Tool/issues
35 |
36 | .. toctree::
37 | :maxdepth: 2
38 |
39 | example_cases.rst
40 | readme.rst
41 |
42 |
43 | .. toctree::
44 | :maxdepth: 1
45 |
46 | changelog.rst
47 |
48 | * :ref:`genindex`
49 | * :ref:`modindex`
50 | * :ref:`search`
51 |
52 | :Author:
53 | Aislyn Rose
54 |
55 | rose.aislyn.noelle@gmail.com
56 |
57 | webpage_
58 |
59 | github_
60 |
61 | .. _webpage: https://a-n-rose.github.io/
62 |
63 | .. _github : https://github.com/a-n-rose
64 |
--------------------------------------------------------------------------------
/docs/source/model_dataprep.rst:
--------------------------------------------------------------------------------
1 |
2 | Feeding large datasets to models
3 | --------------------------------
4 |
5 | .. autoclass:: soundpy.models.dataprep.Generator
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
10 | .. automethod:: __init__
11 |
12 |
13 | .. automodule:: soundpy.models.dataprep
14 | :members:
15 | :undoc-members:
16 | :show-inheritance:
17 |
--------------------------------------------------------------------------------
/docs/source/modelsetup.rst:
--------------------------------------------------------------------------------
1 |
2 | Additional model setup (e.g. Early Stopping)
3 | --------------------------------------------
4 |
5 | .. automodule:: soundpy.models.modelsetup
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | ==============================
2 | SoundPy Functionality v0.1.0a3
3 | ==============================
4 |
5 | .. include:: builtin_sp.rst
6 |
7 | .. include:: builtin_spdl.rst
8 |
9 | .. include:: augment.rst
10 |
11 | .. include:: files.rst
12 |
13 | .. include:: datasets.rst
14 |
15 | .. include:: dsp.rst
16 |
17 | .. include:: filters.rst
18 |
19 | .. include:: feats.rst
20 |
21 | .. include:: template_models.rst
22 |
23 | .. include:: modelsetup.rst
24 |
25 | .. include:: model_dataprep.rst
26 |
27 | .. include:: utils.rst
28 |
29 | .. include:: exceptions.rst
30 |
--------------------------------------------------------------------------------
/docs/source/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: modules.rst
2 |
3 |
4 |
--------------------------------------------------------------------------------
/docs/source/template_models.rst:
--------------------------------------------------------------------------------
1 | Template deep neural networks
2 | -----------------------------
3 |
4 | .. automodule:: soundpy.models.template_models
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/docs/source/utils.rst:
--------------------------------------------------------------------------------
1 |
2 | Other useful non-specific functionality
3 | ---------------------------------------
4 |
5 | .. automodule:: soundpy.utils
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/source/versions.rst:
--------------------------------------------------------------------------------
1 | ******************************************
2 | SoundPy Versions Available as PyPI Package
3 | ******************************************
4 |
5 | .. toctree::
6 | :maxdepth: 1
7 |
8 | 0.1.0a2/index.rst
9 |
10 | 0.1.0a3/index.rst
11 |
12 |
--------------------------------------------------------------------------------
/new_version_updates.md:
--------------------------------------------------------------------------------
1 | # Updates of v0.1.0a3 release:
2 |
3 | ## Updates
4 | - don't use librosa for feature extraction anymore. But compatible with previous versions.
5 | - parameter: frames_per_sample and context_window, with depreciation warning
6 | Just remove these parameters from feature extraction and limit to generators. Otherwise too messy and complex
7 | - soundpy.models.builtin.implement_denoiser() raise warning if cleaned features cannot be
8 | converted to raw audio samples.
9 | - BUG FIX: soundpy.feats.plot can now be used from within generator using backend Agg and
10 | then switch to Tkinker backend using use_tkinker parameter for normal use outside of training.
11 | - require additional tensors to be added to the desired shape and then supplied to generator to make shape process more explicit in generator.
12 |
13 | changed parameter (Generator) normalized to normalize (opposite bool); removed add_tensor_last parameter, adjusted grayscale2color sections: can be applied to 2D data; set sr default to 22050
14 |
15 | - Got the augment cnn builtin functionality to run with pre-trained features.. needs cleaning
16 | - got plotsound, plot vad, and plot dom freq, to work with stereo sound
17 |
18 | Removing from envclassifier_extract_train:
19 | dataset_dict = None,
20 | num_labels = None,
21 |
22 |
23 | ## Updates of v0.1.0a2 release:
24 |
25 | ### Updated Dependencies
26 | - Updated dependencies to newest versions still compatible with Tensorflow 2.1.0
27 | - Note: bug in training with generators occurs with Tensorflow 2.2.0+. Models trained via generators fail to learn. Therefore, Tensorflow is limited to version 2.1.0 until that bug is fixed.
28 |
29 | ### GPU option added
30 | - provide instructions for running Docker image for GPU
31 |
32 | ### soundpy.dsp.vad
33 | - add `use_beg_ms` parameter: improved VAD recognition of silences post speech.
34 | - raise warning for sample rates lower than 44100 Hz. VAD seems to fail at lower sample rates.
35 |
36 | ### soundpy.feats.get_vad_samples and soundpy.feats.get_vad_stft
37 | - moved from dsp module to the feats module
38 | - add `extend_window_ms` paremeter: can extend VAD window if desired. Useful in higher SNR environments.
39 | - raise warning for sample rates lower than 44100 Hz. VAD seems to fail at lower sample rates.
40 |
41 | ### added soundpy.feats.get_samples_clipped and soundpy.feats.get_stft_clipped
42 | - another option for VAD
43 | - clips beginning and ending of audio data where high energy sound starts and ends.
44 |
45 | ### soundpy.models.dataprep.GeneratorFeatExtraction
46 | - can extract and augment features from audio files as each audio file fed to model.
47 | - example can be viewed: soundpy.models.builtin.envclassifier_extract_train
48 | - note: still very experimental
49 |
50 | ### soundpy.dsp.add_backgroundsound
51 | - improvements in the smoothness of the added signal.
52 | - soundpy.dsp.clip_at_zero
53 | - improved soundpy.dsp.vad and soundpy.feats.get_vad_stft
54 |
55 | ### soundpy.feats.normalize
56 | - can use it: soundpy.normalize (don't need to remember dsp or feats)
57 |
58 | ### soundpy.dsp.remove_dc_bias
59 | - implemented in soundpy.files.loadsound() and soundpy.files.savesound()
60 | - vastly improves the ability to work with and combine signals.
61 |
62 | ### soundpy.dsp.clip_at_zero
63 | - clips beginning and ending audio at zero crossings (at negative to positive zero crossings)
64 | - useful when concatenating signals
65 | - useful for removing clicks at beginning or ending of audio signals
66 |
67 | ### soundpy.dsp.apply_sample_length
68 | - can now mirror the sound as a form of sound extention with parameter `mirror_sound`.
69 |
70 | ### Removed soundpy_online (and therefore mybinder as well)
71 | - for the time being, this is too much work to keep up. Eventually plan on bringing this back in a more maintainable manner.
72 |
73 | ### Added stereo sound functionality to the following functions:
74 | - soundpy.dsp.add_backgroundsound
75 | - soundpy.dsp.clip_at_zero
76 | - soundpy.dsp.calc_fft
77 | - soundpy.feats.get_stft
78 | - soundpy.feats.get_vad_stft
79 |
80 | ### New functions related to stereo sound
81 | - soundpy.dsp.ismono for checking if a signal is mono or stereo
82 | - soundpy.dsp.average_channels for averaging amplitude in all channels (e.g. identifying when energetic sounds start / end: want to consider all channels)
83 | - soundpy.dsp.add_channels for adding additional channels if needed (e.g. for applying a 'hann' or 'hamming' window to stereo sound)
84 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow>=2.1.0
2 | numpy
3 | scipy
4 | scikit-learn
5 | librosa
6 | python-speech-features
7 | matplotlib
8 | soundfile
9 | numba
10 | scikit-image>=0.17.2
11 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | from setuptools import setup, find_packages
3 |
4 | # The directory containing this file
5 | HERE = pathlib.Path(__file__).parent
6 |
7 | # The text of the README file
8 | README = (HERE / "README.md").read_text()
9 |
10 | dependencies=''
11 | with open("requirements.txt","r") as f:
12 | dependencies = f.read().splitlines()
13 |
14 | # This call to setup() does all the work
15 | setup(
16 | name="soundpy",
17 | version="0.1.0a3",
18 | description="A research-based framework for exploring sound as well as machine learning in the context of sound.",
19 | long_description=README,
20 | long_description_content_type="text/markdown",
21 | url="https://github.com/a-n-rose/Python-Sound-Tool",
22 | author="Aislyn Rose",
23 | author_email="rose.aislyn.noelle@gmail.com",
24 | license="AGPL-3.0",
25 | classifiers=[
26 | "License :: OSI Approved :: GNU Affero General Public License v3",
27 | "Programming Language :: Python :: 3",
28 | "Programming Language :: Python :: 3.6",
29 | "Programming Language :: Python :: 3.8",
30 | ],
31 | packages=find_packages(exclude=("tests","docs", "jupyter_notebooks")),
32 | include_package_data=True,
33 | install_requires=dependencies,
34 | python_requires=">=3.6.9",
35 | )
36 |
--------------------------------------------------------------------------------
/soundpy/__init__.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | from . import utils
3 | from . import feats
4 | from . import files
5 | from . import datasets
6 | from . import filters
7 | from . import dsp
8 | from . import builtin
9 | from . import exceptions as errors
10 | from . import augment
11 | from .utils import check_dir, string2pathlib
12 | from .files import loadsound, savesound
13 | from .feats import plotsound, normalize
14 | from .filters import WienerFilter, BandSubtraction
15 | from .dsp import generate_sound, generate_noise
16 | from .builtin import envclassifier_feats, denoiser_feats, filtersignal
17 |
18 | __all__=['utils', 'feats', 'filters', 'WienerFilter', 'BandSubtraction',
19 | 'filtersignal', 'dsp','errors', 'plotsound', 'loadsound', 'savesound',
20 | 'datasets', 'envclassifier_feats', 'denoiser_feats', 'generate_sound', 'playsound',
21 | 'generate_noise', 'builtin', 'augment', 'check_dir', 'string2pathlib',
22 | 'normalize']
23 |
--------------------------------------------------------------------------------
/soundpy/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/soundpy/__init__.pyc
--------------------------------------------------------------------------------
/soundpy/exceptions.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`soundpy.exceptions` module includes customized errors.
3 | """
4 |
5 | def notsufficientdata_error(numtrain, numval, numtest, expected_numtrain):
6 | raise ValueError('Not enough training data:'+\
7 | '\nNumber train samples: {} '.format(numtrain)+\
8 | '(Minumum expected: {})'.format(expected_numtrain)+\
9 | '\nNumber val samples: {}'.format(numval)+\
10 | '\nNumber test samples: {}'.format(numtest) +\
11 | '\n\nPlease lower `perc_train` or collect more audio data.')
12 |
13 | def numfeatures_incompatible_templatemodel():
14 | raise ValueError('ERROR: Number of features is incompatible with the template model. '+\
15 | 'Try a higher number or rely on the defaults. Apologies for this inconvenience.')
16 |
--------------------------------------------------------------------------------
/soundpy/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataprep import Generator, GeneratorFeatExtraction, make_gen_callable
2 | from .template_models import cnn_classifier, autoencoder_denoise, resnet50_classifier, \
3 | cnnlstm_classifier
4 | from .modelsetup import setup_callbacks, setup_layers
5 | from . import plot
6 | from . import builtin
7 | from .builtin import denoiser_train, envclassifier_train, denoiser_run, cnnlstm_train, \
8 | resnet50_train, envclassifier_extract_train, cnnlstm_extract_train, envclassifier_run
9 |
10 | __all__ = ['Generator', 'GeneratorFeatExtraction',
11 | 'cnn_classifier', 'autoencoder_denoise', 'resnet50_classifier',
12 | 'setup_callbacks', 'plot', 'cnnlstm_classifier', 'builtin', 'denoiser_train',
13 | 'envclassifier_train', 'denoiser_run', 'cnnlstm_train', 'resnet50_train',
14 | 'envclassifier_extract_train','make_gen_callable', 'setup_layers',
15 | 'cnnlstm_extract_train', 'envclassifier_run']
16 |
--------------------------------------------------------------------------------
/soundpy/models/plot.py:
--------------------------------------------------------------------------------
1 | import tensorflow
2 | from tensorflow.keras.models import Model
3 | from tensorflow.keras.models import load_model
4 | import numpy as np
5 | import matplotlib.pyplot as plt
6 |
7 | import os, sys
8 | import inspect
9 | currentdir = os.path.dirname(os.path.abspath(
10 | inspect.getfile(inspect.currentframe())))
11 | packagedir = os.path.dirname(currentdir)
12 | sys.path.insert(0, packagedir)
13 | import soundpy as pyst
14 |
15 |
16 | def featuremaps(features, model, image_dir='./feature_maps/'):
17 | '''Saves the feature maps of each convolutional layer as .png file.
18 |
19 | References
20 | ----------
21 | Brownlee, Jason (2019, May, 6). How to Visualize Filters and Feature
22 | Maps in Convolutional Neural Networks. Machine Learning Mastery.
23 | https://machinelearningmastery.com/how-to-visualize-filters-and-feature-maps-in-convolutional-neural-networks/
24 | '''
25 | conv_idx = []
26 | for i in range(len(model.layers)):
27 | layer = model.layers[i]
28 | if 'conv' in layer.name:
29 | conv_idx.append(i)
30 | for idx in conv_idx:
31 | model_featmaps = Model(inputs = model.inputs,
32 | outputs = model.layers[idx].output)
33 | featuremaps = model_featmaps.predict(features)
34 | for i in range(featuremaps.shape[-1]):
35 | plt.clf()
36 | plt.imshow(featuremaps[0,:,:,i], cmap='gray')
37 | image_dir = sp.utils.check_dir(image_dir, make=True)
38 | image_path = image_dir.joinpath('layer_{}'.format(idx),
39 | 'featmap_{}.png'.format(i))
40 | image_par = sp.utils.check_dir(image_path.parent, make=True)
41 | plt.savefig(image_path)
42 |
--------------------------------------------------------------------------------
/soundpy/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a-n-rose/Python-Sound-Tool/4cb9ab7b55da9808da8dec3bc33759a7615ad4ed/soundpy/utils.pyc
--------------------------------------------------------------------------------
/start_jup_env.sh:
--------------------------------------------------------------------------------
1 | docker run -it --rm \
2 | --gpus all \
3 | --privileged=true \
4 | -v "$PWD":"/root/soundpy/" \
5 | -p 8888:8888 aju
6 | #-v "/audiodir/data":"/root/soundpy/data" \
7 |
--------------------------------------------------------------------------------
/tests/inspect_functions.py:
--------------------------------------------------------------------------------
1 | '''
2 | From NLTK decorators: https://github.com/nltk/nltk/blob/develop/nltk/decorators.py
3 |
4 | """
5 | Decorator module by Michele Simionato
6 | Copyright Michele Simionato, distributed under the terms of the BSD License (see below).
7 | http://www.phyast.pitt.edu/~micheles/python/documentation.html
8 | Included in NLTK for its support of a nice memoization decorator.
9 | """
10 | '''
11 |
12 |
13 | import inspect
14 |
15 | def __legacysignature(signature):
16 | """
17 | For retrocompatibility reasons, we don't use a standard Signature.
18 | Instead, we use the string generated by this method.
19 | Basically, from a Signature we create a string and remove the default values.
20 | """
21 | listsignature = str(signature)[1:-1].split(",")
22 | for counter, param in enumerate(listsignature):
23 | if param.count("=") > 0:
24 | listsignature[counter] = param[0:param.index("=")].strip()
25 | else:
26 | listsignature[counter] = param.strip()
27 | return ", ".join(listsignature)
28 |
29 | def getinfo(func):
30 | """
31 | Returns an info dictionary containing:
32 | - name (the name of the function : str)
33 | - argnames (the names of the arguments : list)
34 | - defaults (the values of the default arguments : tuple)
35 | - signature (the signature : str)
36 | - fullsignature (the full signature : Signature)
37 | - doc (the docstring : str)
38 | - module (the module name : str)
39 | - dict (the function __dict__ : str)
40 | >>> def f(self, x=1, y=2, *args, **kw): pass
41 | >>> info = getinfo(f)
42 | >>> info["name"]
43 | 'f'
44 | >>> info["argnames"]
45 | ['self', 'x', 'y', 'args', 'kw']
46 | >>> info["defaults"]
47 | (1, 2)
48 | >>> info["signature"]
49 | 'self, x, y, *args, **kw'
50 | >>> info["fullsignature"]
51 |
52 | """
53 | assert inspect.ismethod(func) or inspect.isfunction(func)
54 | argspec = inspect.getfullargspec(func)
55 | regargs, varargs, varkwargs = argspec[:3]
56 | argnames = list(regargs)
57 | if varargs:
58 | argnames.append(varargs)
59 | if varkwargs:
60 | argnames.append(varkwargs)
61 | fullsignature = inspect.signature(func)
62 | # Convert Signature to str
63 | signature = __legacysignature(fullsignature)
64 |
65 |
66 | # pypy compatibility
67 | if hasattr(func, "__closure__"):
68 | _closure = func.__closure__
69 | _globals = func.__globals__
70 | else:
71 | _closure = func.func_closure
72 | _globals = func.func_globals
73 |
74 | return dict(
75 | name=func.__name__,
76 | argnames=argnames,
77 | signature=signature,
78 | fullsignature=fullsignature,
79 | defaults=func.__defaults__,
80 | doc=func.__doc__,
81 | module=func.__module__,
82 | dict=func.__dict__,
83 | globals=_globals,
84 | closure=_closure,
85 | )
86 |
87 | ########################## LEGALESE ###############################
88 |
89 | ## Redistributions of source code must retain the above copyright
90 | ## notice, this list of conditions and the following disclaimer.
91 | ## Redistributions in bytecode form must reproduce the above copyright
92 | ## notice, this list of conditions and the following disclaimer in
93 | ## the documentation and/or other materials provided with the
94 | ## distribution.
95 |
96 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
97 | ## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
98 | ## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
99 | ## A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
100 | ## HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
101 | ## INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
102 | ## BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
103 | ## OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
104 | ## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
105 | ## TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
106 | ## USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
107 | ## DAMAGE.
108 |
--------------------------------------------------------------------------------
/tests/utils_test.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | import inspect
3 | currentdir = os.path.dirname(os.path.abspath(
4 | inspect.getfile(inspect.currentframe())))
5 | parentdir = os.path.dirname(currentdir)
6 | sys.path.insert(0, parentdir)
7 |
8 | import numpy as np
9 | import pytest
10 | import librosa
11 | import pathlib
12 | import soundpy as sp
13 |
14 | audio_dir = 'test_audio/'
15 | test_audiofile = '{}audio2channels.wav'.format(audio_dir)
16 |
17 |
18 |
19 | def test_path_or_samples_str():
20 | item_type = sp.utils.path_or_samples(test_audiofile)
21 | assert item_type == 'path'
22 |
23 | def test_path_or_samples_pathlib():
24 | item_type = sp.utils.path_or_samples(pathlib.Path(test_audiofile))
25 | assert item_type == 'path'
26 |
27 | def test_path_or_samples_tuple_librosa():
28 | item = librosa.load(test_audiofile)
29 | item_type = sp.utils.path_or_samples(item)
30 | assert item_type == 'samples'
31 |
32 | def test_path_or_samples_tuple_not_real_samples():
33 | item = (np.ndarray([1,2,3]), 4)
34 | item_type = sp.utils.path_or_samples(item)
35 | assert item_type == 'samples'
36 |
37 | def test_path_or_samples_str_not_real_path():
38 | print('IF TEST FAILES: For now, function does not test for path validity.')
39 | with pytest.raises(ValueError):
40 | item_type = sp.utils.path_or_samples('blah')
41 |
42 | def test_path_or_samples_pathlib_not_real_path():
43 | print('IF TEST FAILES: For now, function does not test for path validity.')
44 | with pytest.raises(ValueError):
45 | item_type = sp.utils.path_or_samples(pathlib.Path('blah'))
46 |
47 | def test_match_dtype_float2int():
48 | array_original = np.array([1,2,3,4])
49 | array_to_change = np.array([1.,2.,3.,4.,5.])
50 | array_adjusted = sp.utils.match_dtype(array_to_change, array_original)
51 | assert array_original.dtype == array_adjusted.dtype
52 | assert len(array_to_change) == len(array_adjusted)
53 | assert np.array_equal(array_to_change, array_adjusted)
54 | assert array_to_change.dtype != array_original.dtype
55 |
56 | def test_match_dtype_int2float():
57 | array_original = np.array([1.,2.,3.,4.])
58 | array_to_change = np.array([1,2,3,4,5])
59 | array_adjusted = sp.utils.match_dtype(array_to_change, array_original)
60 | assert array_original.dtype == array_adjusted.dtype
61 | assert len(array_to_change) == len(array_adjusted)
62 | assert np.array_equal(array_to_change, array_adjusted)
63 | assert array_to_change.dtype != array_original.dtype
64 |
65 | def test_shape_samps_channels_too_many_dimensions():
66 | input_data = np.array([1,2,3,4,5,6,7,8,9,10,11,12]).reshape(2,3,2)
67 | with pytest.raises(ValueError):
68 | output_data = sp.dsp.shape_samps_channels(input_data)
69 |
70 | def test_check_dir_default_create():
71 | test_dir = './testtesttest/'
72 | test_dir = sp.utils.check_dir(test_dir)
73 | assert isinstance(test_dir, pathlib.PosixPath)
74 | assert os.path.exists(test_dir)
75 | os.rmdir(test_dir)
76 |
77 | def test_check_dir_check_exists():
78 | test_dir = './testtesttest/'
79 | test_dir = sp.utils.check_dir(test_dir, make=True)
80 | test_dir = sp.utils.check_dir(test_dir, make=False)
81 | assert isinstance(test_dir, pathlib.PosixPath)
82 | assert os.path.exists(test_dir)
83 | os.rmdir(test_dir)
84 |
85 | def test_check_dir_check_exists_raiseerror():
86 | test_dir = './testtesttest/'
87 | with pytest.raises(FileNotFoundError):
88 | test_dir = sp.utils.check_dir(test_dir, make=False)
89 |
90 | def test_check_dir_check_exists_notwriteinto_raiseerror():
91 | test_dir = './testtesttest/'
92 | test_dir = sp.utils.check_dir(test_dir, make=True)
93 | with pytest.raises(FileExistsError):
94 | test_dir = sp.utils.check_dir(test_dir, make=False, append=False)
95 | os.rmdir(test_dir)
96 |
97 | def test_check_dir_pathwithextension_raiseerror():
98 | test_dir = './testtesttest.py/'
99 | with pytest.raises(TypeError):
100 | test_dir = sp.utils.check_dir(test_dir, make=False)
101 |
102 | def test_string2list():
103 | audiofiles = sp.files.collect_audiofiles(audio_dir,wav_only=False,
104 | recursive=False)
105 | audiofiles_string = str(audiofiles)
106 | audiofiles_checked = sp.utils.restore_dictvalue(audiofiles_string)
107 | assert audiofiles == audiofiles_checked
108 |
109 | def test_string2list_loaddict():
110 | audiofiles = sp.files.collect_audiofiles(audio_dir,wav_only=False,
111 | recursive=False)
112 | d = dict([(0,audiofiles)])
113 | test_dict_path = 'testest.csv'
114 | if os.path.exists(test_dict_path):
115 | os.remove(test_dict_path)
116 | d_path = sp.utils.save_dict(
117 | dict2save = d,
118 | filename = test_dict_path)
119 | d_loaded = sp.utils.load_dict(d_path)
120 | for i, key in enumerate(d_loaded):
121 | key = key
122 | audiofiles_string = d_loaded[key]
123 | audiofiles_checked = sp.utils.restore_dictvalue(audiofiles_string)
124 | assert audiofiles == audiofiles_checked
125 | os.remove(test_dict_path)
126 |
127 | def test_restore_dictvalue_list_of_tuples():
128 | pass
129 |
130 | def test_restore_dictvalue_regular_string():
131 | expected = 'hann'
132 | got = sp.utils.restore_dictvalue(expected)
133 | assert expected == got
134 |
135 | def test_restore_dictvalue_None():
136 | expected = None
137 | string_val = str(expected)
138 | got = sp.utils.restore_dictvalue(string_val)
139 | assert expected == got
140 |
141 | def test_restore_dictvalue_True():
142 | expected = True
143 | string_val = str(expected)
144 | got = sp.utils.restore_dictvalue(string_val)
145 | assert expected == got
146 |
147 | def test_restore_dictvalue_False():
148 | expected = False
149 | string_val = str(expected)
150 | got = sp.utils.restore_dictvalue(string_val)
151 | assert expected == got
152 |
153 | def test_restore_dictvalue_int():
154 | expected = 1
155 | string_val = str(expected)
156 | got = sp.utils.restore_dictvalue(string_val)
157 | assert expected == got
158 |
159 | def test_restore_dictvalue_float():
160 | expected = 1.0
161 | string_val = str(expected)
162 | got = sp.utils.restore_dictvalue(string_val)
163 | assert expected == got
164 |
165 | def test_restore_dictvalue_tuple():
166 | expected = (3,4)
167 | string_val = str(expected)
168 | got = sp.utils.restore_dictvalue(string_val)
169 | assert expected == got
170 |
171 | def test_restore_dictvalue_list_of_pathwaystrings():
172 | expected = ['audio1.wav','audio2.wav','audio3.wav']
173 | string_list = str(expected)
174 | got = sp.utils.restore_dictvalue(string_list)
175 | assert expected == got
176 |
177 | def test_restore_dictvalue_list_of_pathlib_ojbect_strings():
178 | expected = [pathlib.Path('audio1.wav'),pathlib.Path('audio2.wav'),pathlib.Path('audio3.wav')]
179 | string_list = str(expected)
180 | got = sp.utils.restore_dictvalue(string_list)
181 | assert expected == got
182 |
183 | def test_restore_dictvalue_list_of_pathwaystrings_nested():
184 | expected = [['audio1.wav','audio2.wav'],['audio3.wav']]
185 | string_list = str(expected)
186 | got = sp.utils.restore_dictvalue(string_list)
187 | assert expected == got
188 |
189 | def test_restore_dictvalue_list_of_pathlib_ojbect_strings_nested():
190 | expected = [[pathlib.Path('audio1.wav'),pathlib.Path('audio2.wav')],[pathlib.Path('audio3.wav')]]
191 | string_list = str(expected)
192 | with pytest.raises(ValueError):
193 | got = sp.utils.restore_dictvalue(string_list)
194 |
195 | def test_restore_dictvalue_tuple_labeledpaths():
196 | expected = [(1, 'audio1.wav'),(2, 'audio2.wav'),(3, 'audio3.wav')]
197 | string_list = str(expected)
198 | got = sp.utils.restore_dictvalue(string_list)
199 | assert expected == got
200 |
201 | def test_restore_dictvalue_tuple_labeled_pathlibojbects():
202 | expected = [(1, pathlib.Path('audio1.wav')),(2, pathlib.Path('audio2.wav')),(3, pathlib.Path('audio3.wav'))]
203 | string_list = str(expected)
204 | got = sp.utils.restore_dictvalue(string_list)
205 | assert expected == got
206 |
--------------------------------------------------------------------------------
/tests_requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 |
--------------------------------------------------------------------------------