├── requirements.txt
├── examples
├── run_tests
├── config.py
├── concatenator
├── src
    ├── tests
    │   ├── run_f0_tests.sh
    │   ├── run_tests.sh
    │   └── config.py
    └── sppysound
    │   ├── synthesis
    │       ├── __init__.py
    │       ├── wavegen.py
    │       ├── timestretch.py
    │       └── synthesis_tools.py
    │   ├── Examples
    │       ├── ExampleTarget
    │       │   └── ElectricGuitarSample-out.wav
    │       ├── ExampleFiles
    │       │   ├── Xylophone.rosewood.roll.ff.B4.stereo.aif
    │       │   ├── Xylophone.rosewood.roll.ff.B5.stereo.aif
    │       │   ├── Xylophone.rosewood.roll.ff.B6.stereo.aif
    │       │   ├── Xylophone.rosewood.roll.ff.B7.stereo.aif
    │       │   ├── Xylophone.rosewood.roll.ff.Ab6.stereo.aif
    │       │   ├── Xylophone.rosewood.roll.ff.Ab7.stereo.aif
    │       │   ├── Xylophone.rosewood.roll.ff.Bb4.stereo.aif
    │       │   └── Xylophone.rosewood.roll.ff.Bb5.stereo.aif
    │       ├── README.txt
    │       ├── analysis_config.py
    │       ├── matching_config.py
    │       ├── synthesis_config.py
    │       ├── Database analysis example.ipynb
    │       ├── Database Matching Example.ipynb
    │       └── Match Synthesis Example.ipynb
    │   ├── analysis
    │       ├── audiograph.py
    │       ├── __init__.py
    │       ├── AnalysisTools.py
    │       ├── ZeroXAnalysis.py
    │       ├── PeakAnalysis.py
    │       ├── SpectralCentroidAnalysis.py
    │       ├── F0HarmRatioAnalysis.py
    │       ├── CentroidAnalysis.py
    │       ├── VarianceAnalysis.py
    │       ├── SpectralCrestFactorAnalysis.py
    │       ├── SpectralFlatnessAnalysis.py
    │       ├── SpectralFluxAnalysis.py
    │       ├── SkewnessAnalysis.py
    │       ├── KurtosisAnalysis.py
    │       ├── RMSAnalysis.py
    │       ├── SpectralSpreadAnalysis.py
    │       ├── Analysis.py
    │       ├── AttackAnalysis.py
    │       └── FFTAnalysis.py
    │   ├── __init__.py
    │   ├── commands.txt
    │   ├── full_run_MBair.sh
    │   ├── full_run_MBpro.sh
    │   ├── docs
    │       ├── examples.rst
    │       ├── refs.bib
    │       ├── index.rst
    │       ├── api.rst
    │       ├── installation.rst
    │       ├── DatabaseAnalysisExample.ipynb
    │       ├── analysis_config.py
    │       ├── matching_config.py
    │       ├── synthesis_config.py
    │       ├── DatabaseMatchingExample.ipynb
    │       ├── MatchSynthesisExample.ipynb
    │       ├── overview.rst
    │       └── Makefile
    │   ├── pitch_shift.py
    │   ├── helper.py
    │   ├── synthesize_output.py
    │   ├── create_database.py
    │   ├── run_matching.py
    │   ├── config.py
    │   └── multirate.py
├── .gitignore
├── install.sh
├── setup.py
└── README.md


/requirements.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples:
--------------------------------------------------------------------------------
1 | ./src/sppysound/Examples


--------------------------------------------------------------------------------
/run_tests:
--------------------------------------------------------------------------------
1 | ./src/tests/run_tests.sh


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | ./src/sppysound/config.py


--------------------------------------------------------------------------------
/concatenator:
--------------------------------------------------------------------------------
1 | ./src/sppysound/concatenator.py


--------------------------------------------------------------------------------
/src/tests/run_f0_tests.sh:
--------------------------------------------------------------------------------
1 | python audiofile_tests.py -v F0AnalysisTests.test_Generatef0
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.wav
3 | *.aiff
4 | *.aif
5 | Session.vim
6 | pip-selfcheck.json
7 | .DS_Store
8 | 


--------------------------------------------------------------------------------
/src/sppysound/synthesis/__init__.py:
--------------------------------------------------------------------------------
1 | import timestretch
2 | import wavegen
3 | __all__ = ["timestretch", "wavegen"]
4 | 


--------------------------------------------------------------------------------
/src/sppysound/Examples/ExampleTarget/ElectricGuitarSample-out.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleTarget/ElectricGuitarSample-out.wav


--------------------------------------------------------------------------------
/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B4.stereo.aif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B4.stereo.aif


--------------------------------------------------------------------------------
/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B5.stereo.aif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B5.stereo.aif


--------------------------------------------------------------------------------
/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B6.stereo.aif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B6.stereo.aif


--------------------------------------------------------------------------------
/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B7.stereo.aif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B7.stereo.aif


--------------------------------------------------------------------------------
/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Ab6.stereo.aif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Ab6.stereo.aif


--------------------------------------------------------------------------------
/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Ab7.stereo.aif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Ab7.stereo.aif


--------------------------------------------------------------------------------
/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Bb4.stereo.aif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Bb4.stereo.aif


--------------------------------------------------------------------------------
/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Bb5.stereo.aif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Bb5.stereo.aif


--------------------------------------------------------------------------------
/src/sppysound/Examples/README.txt:
--------------------------------------------------------------------------------
1 | This folder contains three examples of the pysound API usage. They can be run
2 | using the Jupyter notebook unix command. See the documentation for instructions
3 | on running these scripts.
4 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
1 | set -euo pipefail
2 | pip install numpy
3 | pip install scipy
4 | pip install pysndfile
5 | pip install h5py
6 | pip install https://github.com/Pezz89/fileops/zipball/master
7 | pip install -e ./
8 | pip install sklearn
9 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/audiograph.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | def plot_audio(audio_array):
 4 |     """
 5 |     Plots audio to a graph
 6 |     """
 7 |     plt.plot(audio_array)
 8 |     plt.xlabel("Time (samples)")
 9 |     plt.ylabel("sample value")
10 |     plt.show()
11 | 


--------------------------------------------------------------------------------
/src/sppysound/__init__.py:
--------------------------------------------------------------------------------
 1 | from audiofile import AudioFile, AnalysedAudioFile
 2 | from database import AudioDatabase
 3 | import analysis
 4 | import synthesis
 5 | __all__ = [
 6 |     "analysis",
 7 |     "synthesis",
 8 |     "AudioFile",
 9 |     "AnalysedAudioFile",
10 |     "AudioDatabase"
11 | ]
12 | 


--------------------------------------------------------------------------------
/src/sppysound/Examples/analysis_config.py:
--------------------------------------------------------------------------------
 1 | rms = {
 2 |     "window_size": 100,
 3 |     "overlap": 2,
 4 | }
 5 | 
 6 | analysis_dict = {
 7 |     "f0": "log2_median",
 8 |     "rms": "mean"
 9 | }
10 | 
11 | analysis = {
12 |     "reanalyse": False
13 | }
14 | 
15 | output_file = {
16 |     "samplerate": 44100,
17 |     "format": 131075,
18 |     "channels": 1
19 | }
20 | 


--------------------------------------------------------------------------------
/src/sppysound/commands.txt:
--------------------------------------------------------------------------------
1 | ./create_database.py ~/AudioDatabases/Vocal_examples ~/AnalysedAudioDatabases/Vocal_examples --reanalyse
2 | ./create_database.py ~/AudioDatabases/Viola ~/AnalysedAudioDatabases/Viola3 --reanalyse
3 | ./run_matching.py ~/AnalysedAudioDatabases/Viola3 ~/AnalysedAudioDatabases/Vocal_examples ~/OutputDatabases/TestOutput --rematch
4 | ./synthesize_output.py ~/AnalysedAudioDatabases/Viola3 ~/OutputDatabases/TestOutput ~/AnalysedAudioDatabases/Vocal_examples
5 | 


--------------------------------------------------------------------------------
/src/sppysound/full_run_MBair.sh:
--------------------------------------------------------------------------------
1 | set -euo pipefail
2 | ./create_database.py ~/AudioDatabases/Vocal_examples ~/AnalysedAudioDatabases/Vocal_examples --reanalyse
3 | ./create_database.py ~/AudioDatabases/Viola ~/AnalysedAudioDatabases/Viola3 --reanalyse
4 | ./run_matching.py ~/AnalysedAudioDatabases/Viola3 ~/AnalysedAudioDatabases/Vocal_examples ~/OutputDatabases/TestOutput --rematch
5 | ./synthesize_output.py ~/AnalysedAudioDatabases/Viola3 ~/OutputDatabases/TestOutput ~/AnalysedAudioDatabases/Vocal_examples
6 | 


--------------------------------------------------------------------------------
/src/tests/run_tests.sh:
--------------------------------------------------------------------------------
 1 | SOURCE="${BASH_SOURCE[0]}"
 2 | while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
 3 |   DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
 4 |   SOURCE="$(readlink "$SOURCE")"
 5 |   [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
 6 | done
 7 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
 8 | cd "$DIR"
 9 | python -m unittest -v audiofile_tests
10 | 


--------------------------------------------------------------------------------
/src/sppysound/full_run_MBpro.sh:
--------------------------------------------------------------------------------
1 | set -euo pipefail
2 | ./create_database.py /Volumes/Storage/AudioDatabases/Viola ~/AllDatabases/AnalysedAudioDatabases/Viola3 --reanalyse
3 | ./create_database.py /Volumes/Storage/AudioDatabases/Vocal_examples ~/AllDatabases/AnalysedAudioDatabases/Vocal_examples --reanalyse
4 | ./run_matching.py ~/AllDatabases/AnalysedAudioDatabases/Viola3 ~/AllDatabases/AnalysedAudioDatabases/Vocal_examples ~/AllDatabases/OutputDatabases/TestOutput --rematch
5 | ./synthesize_output.py ~/AllDatabases/AnalysedAudioDatabases/Viola3 ~/AllDatabases/OutputDatabases/TestOutput ~/AllDatabases/AnalysedAudioDatabases/Vocal_examples
6 | 


--------------------------------------------------------------------------------
/src/sppysound/Examples/matching_config.py:
--------------------------------------------------------------------------------
 1 | rms = {
 2 |     "window_size": 100,
 3 |     "overlap": 2,
 4 | }
 5 | 
 6 | analysis_dict = {
 7 |     "f0": "log2_median",
 8 |     "rms": "mean"
 9 | }
10 | 
11 | matcher_weightings = {
12 |     "f0" : 1.,
13 |     "rms": 1.
14 | }
15 | 
16 | analysis = {
17 |     "reanalyse": False
18 | }
19 | 
20 | matcher = {
21 |     "rematch": False,
22 |     "grain_size": 100,
23 |     "overlap": 2,
24 |     # Defines the number of matches to keep for synthesis.
25 |     "match_quantity": 20
26 | }
27 | 
28 | output_file = {
29 |     "samplerate": 44100,
30 |     "format": 131075,
31 |     "channels": 1
32 | }
33 | 


--------------------------------------------------------------------------------
/src/sppysound/Examples/synthesis_config.py:
--------------------------------------------------------------------------------
 1 | rms = {
 2 |     "window_size": 100,
 3 |     "overlap": 2,
 4 | }
 5 | 
 6 | analysis_dict = {
 7 |     "f0": "log2_median",
 8 |     "rms": "mean"
 9 | }
10 | 
11 | analysis = {
12 |     "reanalyse": False
13 | }
14 | 
15 | output_file = {
16 |     "samplerate": 44100,
17 |     "format": 131075,
18 |     "channels": 1
19 | }
20 | 
21 | synthesizer = {
22 |     "enforce_rms": True,
23 |     "enf_rms_ratio_limit": 5.,
24 |     "enforce_f0": True,
25 |     "enf_f0_ratio_limit": 10.,
26 |     "grain_size": 100,
27 |     "overlap": 2,
28 |     "normalize" : True,
29 |     # Defines the number of potential grains to choose from matches when
30 |     # synthesizing output.
31 |     "match_quantity": 20
32 | }
33 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/examples.rst:
--------------------------------------------------------------------------------
 1 | API Usage Examples
 2 | ==================
 3 | 
 4 | This section aims to give use case examples for the API. This shows how the
 5 | code can be used to create a python script similar to that of the concatenator
 6 | script.
 7 | 
 8 | WARNING: As examples use the same files, only one notebook can be run at a
 9 | time. Make sure one notebook has been closed and halted before starting the
10 | next.
11 | 
12 | This section contains three examples of how to use the API for creating and
13 | comparing audio databases. Examples have been created using Jupyter interactive
14 | notebooks that can be found in the project folder and used to step through the
15 | code line by line interactively to see results.
16 | 
17 | .. toctree::
18 |    :maxdepth: 2
19 | 
20 |    DatabaseAnalysisExample
21 | 
22 |    DatabaseMatchingExample
23 | 
24 |    MatchSynthesisExample
25 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/refs.bib:
--------------------------------------------------------------------------------
 1 | @BOOK{smith2011sasp,
 2 | 	AUTHOR = "Julius O. Smith",
 3 | 	TITLE = "Spectral Audio Signal Processing",
 4 | 	PUBLISHER = "http://ccrma.stanford.edu/~jos/sasp/",
 5 | 	YEAR = "accessed 21.03.2016",
 6 |     NOTE = "online book, 2011 edition"
 7 | }
 8 | 
 9 | @book{itaa2014,
10 |    title =     {Introduction to Audio Analysis. A MATLAB Approach},
11 |    author =    {Theodoros Giannakopoulos and Aggelos Pikrakis},
12 |    publisher = {Academic Press},
13 |    isbn =      {978-0-08-099388-1},
14 |    year =      {2014},
15 |    edition =   {1},
16 | }
17 | 
18 | @book{lerch2012itaca,
19 |    title =     {An Introduction to Audio Content Analysis: Applications in Signal Processing and Music Informatics},
20 |    author =    {Alexander Lerch},
21 |    publisher = {Wiley-IEEE Press},
22 |    isbn =      {9781118266823,9781118393550},
23 |    year =      {2012},
24 | }
25 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup, find_packages
 3 | 
 4 | 
 5 | def read(fname):
 6 |     """
 7 |     Utility function to read the README file.
 8 | 
 9 |     Used for the long_description.  It's nice, because now 1) we have a top level
10 |     README file and 2) it's easier to type in the README file than to put a raw
11 |     string in below ...
12 |     """
13 |     return open(os.path.join(os.path.dirname(__file__), fname)).read()
14 | 
15 | setup(
16 |     name="sppysound",
17 |     version="1.0",
18 |     author="Sam Perry",
19 |     author_email="u1265119@unimail.hud.ac.uk",
20 |     description=("A library for audio analysis and synthesis."),
21 |     license="GPL",
22 |     keywords="synthesis audio",
23 |     url="https://github.com/Pezz89/pysound",
24 |     package_dir={'': 'src'},
25 |     packages=find_packages(where='src'),
26 |     setup_requires=["numpy"],  # Just numpy here
27 |     install_requires=read('requirements.txt')
28 | )
29 | 


--------------------------------------------------------------------------------
/src/sppysound/pitch_shift.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pdb
 3 | import subprocess
 4 | 
 5 | from audiofile import AudioFile
 6 | 
 7 | def shift(sigin, pitch):
 8 |     if np.isnan(pitch):
 9 |         return sigin
10 |     input_filepath = "./.shift_input.wav"
11 |     output_filepath = "./.shift_output.wav"
12 | 
13 |     shift_input = AudioFile.gen_default_wav(
14 |         input_filepath,
15 |         overwrite_existing=True,
16 |         mode='w',
17 |         channels=1,
18 |     )
19 |     # Write grain to be shifted to file
20 |     shift_input.write_frames(sigin)
21 |     # Close file
22 |     del shift_input
23 | 
24 |     cents = 1200. * np.log2(pitch)
25 |     p_shift_args = ["sox", input_filepath, output_filepath, "pitch", str(cents)]
26 | 
27 |     p = subprocess.Popen(p_shift_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
28 |     (output, err) = p.communicate()
29 | 
30 |     with AudioFile(output_filepath, mode='r') as shift_output:
31 |         # Read result
32 |         result = shift_output.read_grain()
33 |     return result
34 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/__init__.py:
--------------------------------------------------------------------------------
 1 | from Analysis import Analysis
 2 | from RMSAnalysis import RMSAnalysis
 3 | from ZeroXAnalysis import ZeroXAnalysis
 4 | from FFTAnalysis import FFTAnalysis
 5 | from SpectralCentroidAnalysis import SpectralCentroidAnalysis
 6 | from SpectralSpreadAnalysis import SpectralSpreadAnalysis
 7 | from SpectralFluxAnalysis import SpectralFluxAnalysis
 8 | from SpectralCrestFactorAnalysis import SpectralCrestFactorAnalysis
 9 | from SpectralFlatnessAnalysis import SpectralFlatnessAnalysis
10 | from PeakAnalysis import PeakAnalysis
11 | from F0Analysis import F0Analysis
12 | from CentroidAnalysis import CentroidAnalysis
13 | from VarianceAnalysis import VarianceAnalysis
14 | from KurtosisAnalysis import KurtosisAnalysis
15 | from SkewnessAnalysis import SkewnessAnalysis
16 | from F0HarmRatioAnalysis import F0HarmRatioAnalysis
17 | import AnalysisTools
18 | __all__ = [
19 |     "Analysis",
20 |     "ZeroXAnalysis",
21 |     "RMSAnalysis",
22 |     "AnalysisTools",
23 |     "FFTAnalysis",
24 |     "SpectralCentroidAnalysis",
25 |     "SpectralSpreadAnalysis",
26 |     "SpectralFluxAnalysis",
27 |     "SpectralCrestFactorAnalysis",
28 |     "SpectralFlatnessAnalysis",
29 |     "F0Analysis",
30 |     "PeakAnalysis",
31 |     "CentroidAnalysis",
32 |     "VarianceAnalysis",
33 |     "KurtosisAnalysis",
34 |     "SkewnessAnalysis",
35 |     "F0HarmRatioAnalysis"
36 | ]
37 | 


--------------------------------------------------------------------------------
/src/sppysound/synthesis/wavegen.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def gen_wave(
 4 |     size, 
 5 |     freq, 
 6 |     wave_type, 
 7 |     phase = 0.0, 
 8 |     amplitude = 1.0, 
 9 |     samplerate = 44100
10 | ):
11 |     """
12 |     Generates a numpy array of given size (seconds) containing a wave of given
13 |     type and frequency at the samplerate specified
14 |     Note: Waves generated are raw and not anti-aliased. For audio signals
15 |     consider using other algorithms
16 |     """
17 | 
18 |     def sine():
19 |         samples = np.arange(0, size, 1. / samplerate)
20 |         return amplitude * np.sin(2.0*np.pi*freq*samples)
21 |     
22 |     def square():
23 |         return amplitude * np.sign(sine())
24 |     
25 |     def triangle():
26 |         samples = np.arange(0, size, 1. / samplerate)
27 |         return amplitude - (2 * np.abs(samples * (2 * freq) % (2*amplitude) - amplitude))
28 | 
29 |     def sawtooth():
30 |         samples = np.arange(0, size, 1. / samplerate)
31 |         return amplitude - (2 * np.abs((samples * freq) % amplitude - amplitude))
32 |     
33 |     def reverse_saw():
34 |         samples = np.arange(0, size, 1. / samplerate)
35 |         return amplitude - (2 * np.abs(((samples * freq) % amplitude)))
36 |     
37 |     options = {
38 |         "sine" : sine,
39 |         "square" : square,
40 |         "tri" : triangle,
41 |         "saw" : sawtooth,
42 |         "rev_saw" : reverse_saw
43 |     }
44 | 
45 |     return options[wave_type]()
46 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/AnalysisTools.py:
--------------------------------------------------------------------------------
 1 | """A collection of useful tools for multiple audio analyses."""
 2 | 
 3 | from __future__ import division
 4 | from scipy.signal import butter, lfilter
 5 | import logging
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class ButterFilter:
11 |     def __init__(self, *args, **kwargs):
12 |         self.filtervalues = None
13 |         self.logger = logging.getLogger(__name__ + '.ButterFilter')
14 | 
15 |     def design_butter(self, cutoff, fs, filtertype='high', order=5):
16 |         """
17 |         Generate a butterworth filter of type and order specified.
18 | 
19 |         Calculates the cutoff frequency based on the samplerate.
20 |         """
21 |         # Ref: This code has been adapted from:
22 |         # http://stackoverflow.com/questions/25191620/creating-lowpass-filter-in-scipy-understanding-methods-and-units
23 | 
24 |         # Calculate nyquist rate
25 |         nyq = 0.5 * fs
26 |         # Calculate the cutoff based on the nyquist rate
27 |         normal_cutoff = cutoff / nyq
28 |         # Calcuate filter coefficients based on parameters
29 |         b, a = butter(order, normal_cutoff, btype=filtertype, analog=False)
30 |         self.filtervalues = b, a
31 | 
32 | 
33 |     def filter_butter(self, data):
34 |         """Filter audio using a butterworth filter."""
35 |         # Filter audio using coefficients generated
36 |         y = lfilter(self.filtervalues[0], self.filtervalues[1], data)
37 |         return y
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PySoundConcat
 2 | 
 3 | WARNING: There are a currently a number of problems with the synthesis stage of
 4 | this project that may result in less than ideal results. These will hopefully
 5 | be fixed in the near future.
 6 | 
 7 | A python script for generating granular synthesis driven representations of
 8 | audio files based on audio database analysis.
 9 | 
10 | This script was developed to explore the creative potential of combining
11 | short-time audio analyses with granular synthesis, to synthesize perceptually
12 | related representations of target audio files. Through use of analysed
13 | databases of varying sizes, an output can be generated that represents a mix of
14 | the spectral and temporal features of the original target sound and the corpus
15 | of source sounds.
16 | 
17 | To achieve this, a technique known as "concatenative synthesis" is used. This
18 | form of synthesis combines the ability to window and join small segments of
19 | sound to create a new sound (a process known as granular synthesis), with audio
20 | analysis techniques capable of describing a sound in order to differentiate it
21 | from others. By analysing small segments in a target sound for their perceptual
22 | characteristics (such as pitch, timbre and loudness), it is then possible to
23 | compare these segments to a collection of source sounds to find perceptually
24 | similar segments. From this, the most perceptually similar matches can be taken
25 | and joined using granular synthesis techniques in order to achieve the final
26 | result.
27 | 
28 | Full documentation can be found at: http://pezz89.github.io/PySoundConcat/index.html
29 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. The Concatenator documentation master file, created by
 2 |    sphinx-quickstart on Tue Mar 15 18:41:42 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to Concatenator's documentation!
 7 | ========================================
 8 | 
 9 | A python script for generating granular synthesis driven representations of
10 | audio files based on audio database analysis.
11 | 
12 | .. image:: Concatenator_diagram_scaled_small.jpg
13 |     :align: center
14 |     :scale: 100%
15 | 
16 | This script was developed to explore the creative potential of combining
17 | short-time audio analyses with granular synthesis, to synthesize perceptually
18 | related representations of target audio files. Through use of analysed
19 | databases of varying sizes, an output can be generated that represents a mix of
20 | the spectral and temporal features of the original target sound and the corpus
21 | of source sounds.
22 | 
23 | To achieve this, a technique known as "concatenative synthesis" is used. This
24 | form of synthesis combines the ability to window and join small segments of
25 | sound to create a new sound (a process known as granular synthesis), with audio
26 | analysis techniques capable of describing a sound in order to differentiate it
27 | from others. By analysing small segments in a target sound for their perceptual
28 | characteristics (such as pitch, timbre and loudness), it is then possible to
29 | compare these segments to a collection of source sounds to find perceptually
30 | similar segments. From this, the most perceptually similar matches can be taken
31 | and joined using granular synthesis techniques in order to achieve the final
32 | result.
33 | 
34 | 
35 | .. toctree::
36 |    :maxdepth: 3
37 | 
38 |    overview
39 |    installation
40 |    tutorial
41 |    descriptor_defs
42 |    api
43 |    examples
44 | 


--------------------------------------------------------------------------------
/src/sppysound/helper.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | class OrderedSet(collections.MutableSet):
 4 |     '''
 5 |     Defines a set object that remembers the order that items are added to it.
 6 | 
 7 |     Taken from: http://code.activestate.com/recipes/576694/
 8 |     '''
 9 | 
10 |     def __init__(self, iterable=None):
11 |         self.end = end = []
12 |         end += [None, end, end]         # sentinel node for doubly linked list
13 |         self.map = {}                   # key --> [key, prev, next]
14 |         if iterable is not None:
15 |             self |= iterable
16 | 
17 |     def __len__(self):
18 |         return len(self.map)
19 | 
20 |     def __contains__(self, key):
21 |         return key in self.map
22 | 
23 |     def add(self, key):
24 |         if key not in self.map:
25 |             end = self.end
26 |             curr = end[1]
27 |             curr[2] = end[1] = self.map[key] = [key, curr, end]
28 | 
29 |     def discard(self, key):
30 |         if key in self.map:
31 |             key, prev, next = self.map.pop(key)
32 |             prev[2] = next
33 |             next[1] = prev
34 | 
35 |     def __iter__(self):
36 |         end = self.end
37 |         curr = end[2]
38 |         while curr is not end:
39 |             yield curr[0]
40 |             curr = curr[2]
41 | 
42 |     def __reversed__(self):
43 |         end = self.end
44 |         curr = end[1]
45 |         while curr is not end:
46 |             yield curr[0]
47 |             curr = curr[1]
48 | 
49 |     def pop(self, last=True):
50 |         if not self:
51 |             raise KeyError('set is empty')
52 |         key = self.end[1][0] if last else self.end[2][0]
53 |         self.discard(key)
54 |         return key
55 | 
56 |     def __repr__(self):
57 |         if not self:
58 |             return '%s()' % (self.__class__.__name__,)
59 |         return '%s(%r)' % (self.__class__.__name__, list(self))
60 | 
61 |     def __eq__(self, other):
62 |         if isinstance(other, OrderedSet):
63 |             return len(self) == len(other) and list(self) == list(other)
64 |         return set(self) == set(other)
65 | 
66 | 


--------------------------------------------------------------------------------
/src/tests/config.py:
--------------------------------------------------------------------------------
  1 | rms = {
  2 |     "window_size": 130,
  3 |     "overlap": 16,
  4 | }
  5 | 
  6 | f0 = {
  7 |     "window_size": 2048,
  8 |     "overlap": 8,
  9 |     "ratio_threshold": 0.0
 10 | }
 11 | 
 12 | variance = {
 13 |     "window_size": 130,
 14 |     "overlap": 16
 15 | }
 16 | 
 17 | kurtosis = {
 18 |     "window_size": 130,
 19 |     "overlap": 16
 20 | }
 21 | 
 22 | skewness = {
 23 |     "window_size": 130,
 24 |     "overlap": 16
 25 | }
 26 | 
 27 | fft = {
 28 |     "window_size": 65536
 29 | }
 30 | 
 31 | 
 32 | matcher_weightings = {
 33 |     "f0" : 1.,
 34 |     "spccntr" : 1.,
 35 |     "spcsprd" : 1.,
 36 |     "spcflux" : 1.,
 37 |     "spccf" : 1.,
 38 |     "spcflatness": 1.,
 39 |     "zerox" : 1.,
 40 |     "rms" : 1.,
 41 |     "peak": 1.,
 42 |     "centroid": 1.,
 43 |     "kurtosis": 1.,
 44 |     "skewness": 1.,
 45 |     "variance": 3.,
 46 |     "harm_ratio": 1.
 47 | }
 48 | 
 49 | analysis_dict = {
 50 |     "f0": "log2_median",
 51 |     "rms": "mean",
 52 |     "zerox": "mean",
 53 |     "spccntr": "mean",
 54 |     "spcsprd": "mean",
 55 |     "spcflux": "mean",
 56 |     "spccf": "mean",
 57 |     "spcflatness": "mean",
 58 |     "peak": "mean",
 59 |     "centroid": "mean",
 60 |     "kurtosis": "mean",
 61 |     "skewness": "mean",
 62 |     "variance": "mean",
 63 |     "harm_ratio": "mean"
 64 | }
 65 | 
 66 | analysis = {
 67 |     "reanalyse": False
 68 | }
 69 | 
 70 | matcher = {
 71 |     "rematch": True,
 72 |     "grain_size": 130,
 73 |     "overlap": 16,
 74 |     # Defines the number of matches to keep for synthesis. Note that this must
 75 |     # also be specified in the synthesis config
 76 |     "match_quantity": 20
 77 | }
 78 | 
 79 | synthesizer = {
 80 |     "enforce_rms": True,
 81 |     "enf_rms_ratio_limit": 5.,
 82 |     "enforce_f0": True,
 83 |     "enf_f0_ratio_limit": 10.,
 84 |     "grain_size": 130,
 85 |     "overlap": 16,
 86 |     "normalize" : True,
 87 |     # Defines the number of potential grains to choose from matches when
 88 |     # synthesizing output.
 89 |     "match_quantity": 20
 90 | }
 91 | 
 92 | output_file = {
 93 |     "samplerate": 44100,
 94 |     "format": 131075,
 95 |     "channels": 1
 96 | }
 97 | 
 98 | database = {
 99 |     "symlink": True
100 | }
101 | 


--------------------------------------------------------------------------------
/src/sppysound/synthesize_output.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """Command line interface for matching databases"""
 4 | 
 5 | import argparse
 6 | import audiofile
 7 | import logging
 8 | from fileops import loggerops
 9 | import pdb
10 | import os
11 | import __builtin__
12 | import config
13 | from database import AudioDatabase, Synthesizer
14 | 
15 | filename = os.path.splitext(__file__)[0]
16 | logger = loggerops.create_logger(log_filename='./{0}.log'.format(filename))
17 | 
18 | def main():
19 |     """Parse arguments then generate database."""
20 |     logger.info('Started')
21 |     parser = argparse.ArgumentParser(
22 |         description='Generate a database at argument 1 based on files in '
23 |         'argument 2.'
24 |     )
25 |     parser.add_argument(
26 |         'source',
27 |         type=str,
28 |         help='source database directory'
29 |     )
30 |     parser.add_argument(
31 |         'output',
32 |         type=str,
33 |         help='output database directory'
34 |     )
35 |     parser.add_argument(
36 |         'target',
37 |         type=str,
38 |         help='target database directory',
39 |         default=None
40 |     )
41 |     args = parser.parse_args()
42 | 
43 |     # Load database of samples to be used for output synthesis
44 |     source_db = AudioDatabase(
45 |         args.source,
46 |         config=config,
47 |         analysis_list={"f0", "rms", "peak"}
48 |     )
49 |     # Create/load a pre-existing database
50 |     source_db.load_database(reanalyse=False)
51 | 
52 |     # Load database used to generate matches to source database.
53 |     # This is used when enforcing analyses such as RMS and F0. (Original grains
54 |     # are needed to calculate the ratio to alter the synthesized grain by)
55 |     target_db = AudioDatabase(
56 |         args.target,
57 |         config=config,
58 |         analysis_list={"f0", "rms", "peak"}
59 |     )
60 |     # Create/load a pre-existing database
61 |     target_db.load_database(reanalyse=False)
62 | 
63 |     output_db = AudioDatabase(
64 |         args.output,
65 |         config=config
66 |     )
67 |     # Create/load a pre-existing database
68 |     output_db.load_database(reanalyse=False)
69 | 
70 |     synthesizer = Synthesizer(source_db, output_db, target_db=target_db, config=config)
71 |     synthesizer.synthesize(grain_size=config.synthesizer["grain_size"], overlap=config.synthesizer["overlap"])
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/api.rst:
--------------------------------------------------------------------------------
  1 | API
  2 | ===
  3 | This section contains the details of the underlying code used to create the
  4 | concatenator script. This is included so that developer may use the library for
  5 | further work in this area and aims to provide a complete description of
  6 | functions and class structures in an easy to use format.
  7 | 
  8 | -------------------
  9 | AudioFile Class
 10 | -------------------
 11 | .. autoclass:: audiofile.AudioFile
 12 |    :members:
 13 | 
 14 | ---------------------------
 15 | AnalysedAudioFile Class
 16 | ---------------------------
 17 | .. autoclass:: audiofile.AnalysedAudioFile
 18 |    :members:
 19 | 
 20 | -------------------
 21 | Database Class
 22 | -------------------
 23 | .. autoclass:: database.AudioDatabase
 24 |    :members:
 25 | 
 26 | -------------------
 27 | Matcher Class
 28 | -------------------
 29 | .. autoclass:: database.Matcher
 30 |    :members:
 31 | 
 32 | ---------------------------
 33 | Synthesizer Class
 34 | ---------------------------
 35 | .. autoclass:: database.Synthesizer
 36 |    :members:
 37 | 
 38 | ---------------------------
 39 | Analysis Classes
 40 | ---------------------------
 41 | .. autoclass:: analysis.Analysis
 42 |    :members:
 43 | Centroid Analysis Class
 44 | ~~~~~~~~~~~~~~~~~~~~~~~
 45 | 
 46 | .. autoclass:: analysis.CentroidAnalysis
 47 |    :members:
 48 | 
 49 | F0 Analysis Class
 50 | ~~~~~~~~~~~~~~~~~~~~~~~
 51 | 
 52 | .. autoclass:: analysis.F0Analysis
 53 |    :members:
 54 | 
 55 | FFT Analysis Class
 56 | ~~~~~~~~~~~~~~~~~~~~~~~
 57 | 
 58 | .. autoclass:: analysis.FFTAnalysis
 59 |    :members:
 60 | 
 61 | Harmonic Ratio Analysis Class
 62 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 63 | 
 64 | .. autoclass:: analysis.F0HarmRatioAnalysis
 65 |    :members:
 66 | 
 67 | Kurtosis Analysis Class
 68 | ~~~~~~~~~~~~~~~~~~~~~~~
 69 | 
 70 | .. autoclass:: analysis.KurtosisAnalysis
 71 |    :members:
 72 | 
 73 | Peak Analysis Class
 74 | ~~~~~~~~~~~~~~~~~~~~~~~
 75 | 
 76 | .. autoclass:: analysis.PeakAnalysis
 77 |    :members:
 78 | 
 79 | RMS Analysis Class
 80 | ~~~~~~~~~~~~~~~~~~~~~~~
 81 | 
 82 | .. autoclass:: analysis.RMSAnalysis
 83 |    :members:
 84 | 
 85 | Spectral Centroid Analysis Class
 86 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 87 | 
 88 | .. autoclass:: analysis.SpectralCentroidAnalysis
 89 |    :members:
 90 | 
 91 | Spectral Crest Factor Analysis Class
 92 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 93 | 
 94 | .. autoclass:: analysis.SpectralCrestFactorAnalysis
 95 |    :members:
 96 | 
 97 | Spectral Flatness Analysis Class
 98 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 99 | 
100 | .. autoclass:: analysis.SpectralFlatnessAnalysis
101 |    :members:
102 | 
103 | Spectral Flux Analysis Class
104 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
105 | 
106 | .. autoclass:: analysis.SpectralFluxAnalysis
107 |    :members:
108 | 
109 | Spectral Spread Analysis Class
110 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
111 | 
112 | .. autoclass:: analysis.SpectralSpreadAnalysis
113 |    :members:
114 | 
115 | Variance Analysis Class
116 | ~~~~~~~~~~~~~~~~~~~~~~~
117 | 
118 | .. autoclass:: analysis.VarianceAnalysis
119 |    :members:
120 | 
121 | Zero-Crossing Analysis Class
122 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
123 | 
124 | .. autoclass:: analysis.ZeroXAnalysis
125 |    :members:
126 | 
127 | 


--------------------------------------------------------------------------------
/src/sppysound/synthesis/timestretch.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | #import pysndfile as psf
  4 | #import audio_funcs as af
  5 | 
  6 | def x_corr_time_lag(grain1, grain2):
  7 |     """
  8 |     Calculate the the time lag between two grains where grain2 is at maximum
  9 |     similarity to grain1
 10 |     """
 11 |     correlation = np.correlate(grain1, grain2, mode = "full")
 12 |     time_lag = np.argmax(correlation) - grain1.size
 13 |     return time_lag
 14 | 
 15 | 
 16 | def cheat_granulate_audio(
 17 |     input_audio,
 18 |     output_audio,
 19 | ):
 20 |     grain_time_diff = 256
 21 |     grain_size = 2048
 22 |     stretch_factor = 1.9
 23 |     grain_reposition = round(grain_time_diff * stretch_factor)
 24 |     overlap_size = grain_time_diff * stretch_factor / 2.0
 25 |     total_num_segs = int(math.ceil(input_audio.frames() / grain_time_diff))
 26 |     input_audio.seek(0, 0)
 27 | 
 28 |     grain1 = af.read_grain(input_audio, 0, grain_size)
 29 | 
 30 |     #starts at 2nd grain and calculates between second and first,
 31 |     #then iterates through grains
 32 |     i = 1
 33 |     print "Segment No.:     ", total_num_segs
 34 |     while i < total_num_segs - 1:
 35 |         grain2 = af.read_grain(input_audio, i*grain_time_diff, grain_size)
 36 |         #Read overlap with next grain to calculate the X-correlation
 37 |         time_lag = x_corr_time_lag(
 38 |             grain2[0:overlap_size],
 39 |             grain1[grain_reposition:grain_reposition+overlap_size],
 40 |         )
 41 |         time_lag = 0
 42 |         fadein = np.linspace(
 43 |             0.0,
 44 |             1.0,
 45 |             grain1.size - (i*grain_reposition-overlap_size+time_lag) #
 46 |         )
 47 |         fadeout = np.linspace(
 48 |             1.0,
 49 |             0.0,
 50 |             grain1.size - ((i*grain_reposition-overlap_size)+time_lag) #
 51 |         )
 52 |         tail = grain1[(i*grain_reposition-overlap_size)+time_lag:grain1.size-1]*fadeout
 53 |         begin = grain2[0:fadein.size]*fadein
 54 |         add = begin + tail
 55 |         grain1 = np.concatenate(
 56 |             (grain1[:i*grain_reposition-overlap_size+time_lag],
 57 |             add,
 58 |             grain2[fadein.size:grain_size])
 59 |         )
 60 |         print grain1.size
 61 |         if i == 200:
 62 |             break
 63 |         i += 1
 64 |     output_audio.write_frames(grain1)
 65 |     exit()
 66 | 
 67 | def granulate_audio(
 68 |     input_audio,
 69 |     output_audio,
 70 |     stretch=1.5,
 71 |     window_size=1000,
 72 |     offset = 500,
 73 |     overlap = 250
 74 | ):
 75 |     """
 76 |     Time-stretches audio using SOLA granulation
 77 |     """
 78 |     input_grains = np.array([])
 79 |     offset_count = 0
 80 |     #Read audio into grains of set size with set offset
 81 |     while True:
 82 |         #Read as many full windows of audio as possible
 83 |         try:
 84 |             read_frames = input_audio.read_frames(window_size)
 85 |             offset_count += offset
 86 |             input_audio.seek(offset_count, 0)
 87 |             if not input_grains.size:
 88 |                 input_grains = np.array([read_frames])
 89 |             else:
 90 |                 input_grains = np.append(input_grains, [read_frames], axis = 0)
 91 |         except RuntimeError:
 92 |             break
 93 |     i = 0
 94 |     while i < input_grains.shape[0] - 1:
 95 |         print i
 96 |         i += 1
 97 |     print input_grains.shape
 98 | 
 99 | 
100 |     #find the best overlap point fo the x-fade by calculating the cross
101 |     #correlation
102 | 
103 |     time_shift = int(round(offset * stretch))
104 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/ZeroXAnalysis.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import numpy as np
 3 | import logging
 4 | from numpy.lib import stride_tricks
 5 | from Analysis import Analysis
 6 | import pdb
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class ZeroXAnalysis(Analysis):
12 | 
13 |     """
14 |     Zero-corssing descriptor class for generation of zero-crossing rate
15 |     analysis.
16 | 
17 |     This descriptor calculates the zero-crossing rate for overlapping grains of
18 |     an AnalysedAudioFile object.  A full definition of zero-crossing analysis
19 |     can be found in the documentation.
20 | 
21 |     Arguments:
22 | 
23 |     - analysis_group: the HDF5 file group to use for the storage of the
24 |       analysis.
25 | 
26 |     - config: The configuration module used to configure the analysis
27 |     """
28 | 
29 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
30 |         super(ZeroXAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'ZeroCrossing')
31 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
32 |         self.analysis_group = analysis_group
33 |         self.logger.info("Creating zero crossing analysis for {0}".format(self.AnalysedAudioFile.name))
34 |         self.create_analysis(frames)
35 | 
36 |     @staticmethod
37 |     def create_zerox_analysis(
38 |         frames,
39 |         window_size=512,
40 |         overlapFac=0.5,
41 |         *args,
42 |         **kwargs
43 |     ):
44 |         """Generate zero crossing value for window of the signal"""
45 |         if hasattr(frames, '__call__'):
46 |             frames = frames()
47 |         hopSize = int(window_size - np.floor(overlapFac * window_size))
48 | 
49 |         # zeros at beginning (thus center of 1st window should be for sample nr. 0)
50 |         samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
51 | 
52 |         # cols for windowing
53 |         cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
54 |         # zeros at end (thus samples can be fully covered by frames)
55 |         samples = np.append(samples, np.zeros(window_size))
56 | 
57 |         # TODO: Better handeling of zeros based on previous sign would improve
58 |         # accuracy.
59 |         epsilon = np.finfo(float).eps
60 |         samples[samples == 0.] += epsilon
61 | 
62 |         frames = stride_tricks.as_strided(
63 |             samples,
64 |             shape=(cols, window_size),
65 |             strides=(samples.strides[0]*hopSize, samples.strides[0])
66 |         ).copy()
67 |         zero_crossing = np.sum(np.abs(np.diff(np.sign(frames))), axis=1)
68 |         return zero_crossing
69 | 
70 |     @staticmethod
71 |     def calc_zerox_frame_times(zerox_frames, sample_frames, samplerate):
72 | 
73 |         """Calculate times for frames using sample size and samplerate."""
74 | 
75 |         if hasattr(sample_frames, '__call__'):
76 |             sample_frames = sample_frames()
77 |         # Get number of frames for time and frequency
78 |         timebins = zerox_frames.shape[0]
79 |         # Create array ranging from 0 to number of time frames
80 |         scale = np.arange(timebins+1)
81 |         # divide the number of samples by the total number of frames, then
82 |         # multiply by the frame numbers.
83 |         zerox_times = (sample_frames.shape[0]/timebins) * scale[:-1]
84 |         # Divide by the samplerate to give times in seconds
85 |         zerox_times = zerox_times / samplerate
86 |         return zerox_times
87 | 
88 |     def hdf5_dataset_formatter(self, *args, **kwargs):
89 |         '''
90 |         Formats the output from the analysis method to save to the HDF5 file.
91 |         '''
92 |         samples = self.AnalysedAudioFile.read_grain()
93 |         samplerate = self.AnalysedAudioFile.samplerate
94 |         output = self.create_zerox_analysis(*args, **kwargs)
95 |         times = self.calc_zerox_frame_times(output, args[0], samplerate)
96 |         return ({'frames': output, 'times': times}, {})
97 | 


--------------------------------------------------------------------------------
/src/sppysound/create_database.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """Command line interface for generating an analysed audio file database."""
  4 | 
  5 | import argparse
  6 | import audiofile
  7 | import logging
  8 | from fileops import loggerops
  9 | import pdb
 10 | import os
 11 | from database import AudioDatabase
 12 | import config
 13 | import __builtin__
 14 | 
 15 | filename = os.path.splitext(__file__)[0]
 16 | logger = loggerops.create_logger(log_filename='./{0}.log'.format(filename))
 17 | 
 18 | ###########################################################################
 19 | # File open and closing monitoring
 20 | openfiles = set()
 21 | oldfile = __builtin__.file
 22 | 
 23 | class newfile(oldfile):
 24 |     def __init__(self, *args):
 25 |         self.x = args[0]
 26 |         logger.debug("OPENING %s" % str(self.x))
 27 |         oldfile.__init__(self, *args)
 28 |         openfiles.add(self)
 29 | 
 30 |     def close(self):
 31 |         logger.debug("CLOSING %s" % str(self.x))
 32 |         oldfile.close(self)
 33 |         openfiles.remove(self)
 34 | oldopen = __builtin__.open
 35 | def newopen(*args):
 36 |     return newfile(*args)
 37 | __builtin__.file = newfile
 38 | __builtin__.open = newopen
 39 | 
 40 | def printOpenFiles():
 41 |     logger.debug("%d OPEN FILES: [%s]" % (len(openfiles), ", ".join(f.x for f in openfiles)))
 42 | 
 43 | ###########################################################################
 44 | 
 45 | def main():
 46 |     """Parse arguments then generate database."""
 47 |     logger.info('Started')
 48 |     parser = argparse.ArgumentParser(
 49 |         description='Generate a database at argument 1 based on files in '
 50 |         'argument 2.'
 51 |     )
 52 |     parser.add_argument(
 53 |         'source',
 54 |         type=str,
 55 |         help='Directory of audio files to be added to the database'
 56 |     )
 57 |     parser.add_argument(
 58 |         'target',
 59 |         type=str,
 60 |         nargs='?',
 61 |         default='',
 62 |         help='Directory to generate the database in. If the directory does not'
 63 |         ' exist then it will be created if possible'
 64 |     )
 65 |     parser.add_argument(
 66 |         '--analyse',
 67 |         '-a',
 68 |         nargs='*',
 69 |         help='Specify analyses to be created. Valid analyses are: \'rms\''
 70 |         '\'f0\' \'atk\' \'fft\'',
 71 |         default=[
 72 |             "rms",
 73 |             "zerox",
 74 |             "fft",
 75 |             "spccntr",
 76 |             "spcsprd",
 77 |             "spcflux",
 78 |             "spccf",
 79 |             "spcflatness",
 80 |             "f0",
 81 |             "peak",
 82 |             "centroid",
 83 |             "variance",
 84 |             "kurtosis",
 85 |             "skewness"
 86 |         ]
 87 |     )
 88 |     parser.add_argument(
 89 |         '--rms',
 90 |         nargs='+',
 91 |         help='Specify arguments for creating RMS analyses'
 92 |     )
 93 |     parser.add_argument(
 94 |         '--atk',
 95 |         nargs='+',
 96 |         help='Specify arguments for creating attack analyses'
 97 |     )
 98 |     parser.add_argument(
 99 |         '--zerox',
100 |         nargs='+',
101 |         help='Specify arguments for creating zero-crossing analyses'
102 |     )
103 |     parser.add_argument(
104 |         '--fft',
105 |         nargs='+',
106 |         help='Specify arguments for creating zero-crossing analyses'
107 |     )
108 |     parser.add_argument(
109 |         "--reanalyse", action="store_true",
110 |         help="Force re-analysis of all analyses, overwriting any existing "
111 |         "analyses"
112 |     )
113 |     args = parser.parse_args()
114 | 
115 |     # Create database object
116 |     database = AudioDatabase(
117 |         args.source,
118 |         args.target,
119 |         analysis_list=args.analyse,
120 |         config=config
121 |     )
122 |     # Create/load a pre-existing database
123 |     database.load_database(reanalyse=args.reanalyse)
124 | 
125 | if __name__ == "__main__":
126 |     main()
127 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/installation.rst:
--------------------------------------------------------------------------------
  1 | Installation
  2 | ============
  3 | 
  4 | This section provides installation instructions for installing the concatenator
  5 | project on your system. There are alternative methods that will most likely
  6 | work for installing both the project and many of it's dependencies,  however
  7 | the method shown below has been tested and is therefore the most reliable
  8 | method for installing this project.
  9 | 
 10 | Prerequesites
 11 | -------------
 12 | 
 13 | Tested working on:
 14 | 
 15 | - System: MacBook Air (11-inch, Mid 2011)
 16 | 
 17 | - Operating System: OSX 10.11.4 or Ubuntu 14.04
 18 | 
 19 | - Processor: 1.6 GHz Intel Core i5
 20 | 
 21 | - Memory: 4 GB 1333 MHz DDR3
 22 | 
 23 | There are a few dependencies required to install concatenator:
 24 | 
 25 | 1. Python 2.7.11 (tested)
 26 | 2. libsndfile - used for audio file IO
 27 | 3. The HDF5 Library - used for large file storage
 28 | 4. The Sox audio library - used for pitch shifting
 29 | 
 30 | Getting this package
 31 | ++++++++++++++++++++
 32 | 
 33 | This package can be downloaded with git using the following command:
 34 | 
 35 | .. code:: bash
 36 |     
 37 |     git clone https://github.com/Pezz89/pysound
 38 | 
 39 | This will clone the project folder into the current directory.
 40 | 
 41 | Brew Python
 42 | +++++++++++
 43 | There are a number of ways to install python. The simplest is through homebrew
 44 | (OSX) / Linuxbrew (most Linux distributions) using the following command:
 45 | 
 46 | .. code:: bash
 47 | 
 48 |     brew install python
 49 | 
 50 | Homebrew can be installed by following installation instructions from here:
 51 | 
 52 | http://brew.sh/
 53 | 
 54 | Linuxbrew can be installed by following installation instructions from here:
 55 | 
 56 | https://github.com/Linuxbrew/linuxbrew
 57 | 
 58 | Pyenv Python
 59 | ++++++++++++
 60 | An alternative that allows greater flexibility is to use pyenv which allows for
 61 | easy switching between python versions and guarantees the exact version needed:
 62 | 
 63 | .. code:: bash
 64 | 
 65 |     brew install pyenv
 66 |     pyenv install 2.7.11
 67 |     pyenv global 2.7.11
 68 | 
 69 | Note that the following may need to be added to your ~/.bashrc file to add
 70 | pyenv pythons to your path.
 71 | 
 72 | .. code:: bash
 73 | 
 74 |     if which pyenv > /dev/null; then eval "$(pyenv init -)"; fi
 75 |     export PYENV_ROOT="$HOME/.pyenv"
 76 |     export PATH="$PYENV_ROOT/bin:$PATH"
 77 |     eval "$(pyenv init -)"
 78 | 
 79 | Other dependencies
 80 | ++++++++++++++++++
 81 | 
 82 | libsndfile, sox and the HDF5 libraries can also be installed via homebrew/linuxbrew:
 83 | 
 84 | .. code:: bash
 85 | 
 86 |     brew install libsndfile
 87 |     brew tap homebrew/science
 88 |     brew install hdf5
 89 |     brew install sox
 90 | 
 91 | Python library and dependencies installation
 92 | --------------------------------------------
 93 | 
 94 | The python package and it's dependencies can then be easily installed by
 95 | running the ./install.sh script from the root director of the project. Note
 96 | that this will install the project in it's project folder. To check that the
 97 | project is working correctly, simply run run_tests.
 98 | 
 99 | .. code:: bash
100 | 
101 |     ./install.sh
102 |     ./run_tests
103 | 
104 | There is a small chance that the installation may fail when installing
105 | dependencies such as scipy or numpy. In these cases the packages must be
106 | installed manually. When this has been done, simply re-run the install.sh
107 | script.
108 | 
109 | Jupyter Notebook Examples
110 | ---------------------------------------
111 | 
112 | The Jupyter notebook application is required in order to run the interactive
113 | examples. It is recommended that this is installed as part of the iPython
114 | library using:
115 | 
116 | .. code:: bash
117 | 
118 |     pip install "ipython[all]"
119 | 
120 | Notebooks can then be viewed from the Examples folder of the project by
121 | running:
122 | 
123 | .. code:: bash
124 | 
125 |     jupyter notebook
126 | 
127 | This will open a notebook session in the browser.
128 | 


--------------------------------------------------------------------------------
/src/sppysound/run_matching.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | import audiofile
  5 | import logging
  6 | from fileops import loggerops
  7 | import pdb
  8 | import os
  9 | import __builtin__
 10 | import config
 11 | from database import AudioDatabase, Matcher
 12 | pdb.pm
 13 | 
 14 | import sys
 15 | modpath = sys.argv[0]
 16 | modpath = os.path.splitext(modpath)[0]+'.log'
 17 | 
 18 | logger = loggerops.create_logger(log_filename=modpath)
 19 | 
 20 | '''
 21 | ###########################################################################
 22 | # File open and closing monitoring
 23 | openfiles = set()
 24 | oldfile = __builtin__.file
 25 | 
 26 | class newfile(oldfile):
 27 |     def __init__(self, *args):
 28 |         self.x = args[0]
 29 |         logger.debug("OPENING %s" % str(self.x))
 30 |         oldfile.__init__(self, *args)
 31 |         openfiles.add(self)
 32 | 
 33 |     def close(self):
 34 |         logger.debug("CLOSING %s" % str(self.x))
 35 |         oldfile.close(self)
 36 |         openfiles.remove(self)
 37 | oldopen = __builtin__.open
 38 | def newopen(*args):
 39 |     return newfile(*args)
 40 | __builtin__.file = newfile
 41 | __builtin__.open = newopen
 42 | 
 43 | def printOpenFiles():
 44 |     logger.debug("%d OPEN FILES: [%s]" % (len(openfiles), ", ".join(f.x for f in openfiles)))
 45 | 
 46 | ###########################################################################
 47 | '''
 48 | 
 49 | def main():
 50 |     """Parse arguments then generate database."""
 51 |     logger.info('Started')
 52 |     parser = argparse.ArgumentParser(
 53 |         description='Generate a database at argument 1 based on files in '
 54 |         'argument 2.'
 55 |     )
 56 |     parser.add_argument(
 57 |         'source',
 58 |         type=str,
 59 |         help='Source database directory'
 60 |     )
 61 |     parser.add_argument(
 62 |         'target',
 63 |         type=str,
 64 |         help='Target database directory'
 65 |     )
 66 |     parser.add_argument(
 67 |         'output',
 68 |         type=str,
 69 |         help='output database directory'
 70 |     )
 71 |     parser.add_argument(
 72 |         '--analyse',
 73 |         '-a',
 74 |         nargs='*',
 75 |         help='Specify analyses to be used. Valid analyses are: \'rms\''
 76 |         '\'f0\' \'fft\'',
 77 |         default=[
 78 |             "rms",
 79 |             "zerox",
 80 |             "fft",
 81 |             "spccntr",
 82 |             "spcsprd",
 83 |             "spcflux",
 84 |             "spccf",
 85 |             "spcflatness",
 86 |             "f0",
 87 |             "peak",
 88 |             "centroid",
 89 |             "kurtosis",
 90 |             "variance",
 91 |             "skewness",
 92 |             "harm_ratio"
 93 |         ]
 94 |     )
 95 |     parser.add_argument(
 96 |         "--rematch", action="store_true",
 97 |         help="Force re-matching, overwriting any existing match data "
 98 |     )
 99 |     args = parser.parse_args()
100 |     source_db = AudioDatabase(
101 |         args.source,
102 |         analysis_list=args.analyse,
103 |         config=config
104 |     )
105 |     # Create/load a pre-existing database
106 |     source_db.load_database(reanalyse=False)
107 | 
108 |     target_db = AudioDatabase(
109 |         args.target,
110 |         analysis_list=args.analyse,
111 |         config=config
112 |     )
113 | 
114 |     # Create/load a pre-existing database
115 |     target_db.load_database(reanalyse=False)
116 | 
117 |     output_db = AudioDatabase(
118 |         args.output,
119 |         config=config
120 |     )
121 | 
122 |     # Create/load a pre-existing database
123 |     output_db.load_database(reanalyse=False)
124 | 
125 | 
126 |     matcher = Matcher(
127 |         source_db,
128 |         target_db,
129 |         config.analysis_dict,
130 |         output_db=output_db,
131 |         config=config,
132 |         quantity=config.matcher["match_quantity"],
133 |         rematch=args.rematch
134 |     )
135 |     matcher.match(matcher.brute_force_matcher, grain_size=config.matcher["grain_size"], overlap=config.matcher["overlap"])
136 | 
137 |     #matcher.match(matcher.k_nearest_neighbour_matching, grain_size=config.matcher["grain_size"], overlap=config.matcher["overlap"])
138 | 
139 | if __name__ == "__main__":
140 |     main()
141 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/PeakAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import os
  3 | import numpy as np
  4 | import logging
  5 | from scipy import signal
  6 | from numpy.lib import stride_tricks
  7 | import pdb
  8 | 
  9 | 
 10 | from fileops import pathops
 11 | 
 12 | from Analysis import Analysis
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class PeakAnalysis(Analysis):
 18 | 
 19 |     """
 20 |     Peak descriptor class for generation of per-grain maximum peak audio analysis.
 21 | 
 22 |     This descriptor calculates the maximum peak for overlapping grains of an
 23 |     AnalysedAudioFile object.  A full definition of peak analysis can be found in
 24 |     the documentation.
 25 | 
 26 |     Arguments:
 27 | 
 28 |     - analysis_group: the HDF5 file group to use for the storage of the
 29 |       analysis.
 30 | 
 31 |     - config: The configuration module used to configure the analysis
 32 |     """
 33 | 
 34 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 35 |         super(PeakAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'Peak')
 36 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 37 |         # Store reference to the file to be analysed
 38 |         self.AnalysedAudioFile = AnalysedAudioFile
 39 | 
 40 |         self.analysis_group = analysis_group
 41 |         self.logger.info("Creating Peak analysis for {0}".format(self.AnalysedAudioFile.name))
 42 |         self.create_analysis(frames)
 43 | 
 44 |     @staticmethod
 45 |     def create_peak_analysis(frames, window_size=512,
 46 |                             window=signal.triang,
 47 |                             overlapFac=0.5):
 48 |         """
 49 |         Calculate the Peak values of windowed segments of the audio file and
 50 |         save to disk.
 51 |         """
 52 |         if hasattr(frames, '__call__'):
 53 |             frames = frames()
 54 |         # Calculate the period of the window in hz
 55 |         # lowest_freq = 1.0 / window_size
 56 |         # Filter frequencies lower than the period of the window
 57 |         # filter = ButterFilter()
 58 |         # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate)
 59 |         # TODO: Fix filter
 60 |         # frames = filter.filter_butter(frames)
 61 | 
 62 |         # Generate a window function to apply to peak windows before analysis
 63 |         hopSize = int(window_size - np.floor(overlapFac * window_size))
 64 | 
 65 |         # zeros at beginning (thus center of 1st window should be for sample nr. 0)
 66 |         samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
 67 | 
 68 |         # cols for windowing
 69 |         cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
 70 |         # zeros at end (thus samples can be fully covered by frames)
 71 |         samples = np.append(samples, np.zeros(window_size))
 72 | 
 73 |         frames = stride_tricks.as_strided(
 74 |             samples,
 75 |             shape=(cols, window_size),
 76 |             strides=(samples.strides[0]*hopSize, samples.strides[0])
 77 |         ).copy()
 78 | 
 79 |         peak = np.max(np.abs(frames), axis=1)
 80 | 
 81 |         return peak
 82 | 
 83 |     def hdf5_dataset_formatter(self, *args, **kwargs):
 84 |         '''
 85 |         Formats the output from the analysis method to save to the HDF5 file.
 86 |         '''
 87 |         samplerate = self.AnalysedAudioFile.samplerate
 88 |         peak = self.create_peak_analysis(*args, **kwargs)
 89 |         peak_times = self.calc_peak_frame_times(peak, args[0], samplerate)
 90 |         return ({'frames': peak, 'times': peak_times}, {})
 91 | 
 92 |     @staticmethod
 93 |     def calc_peak_frame_times(peakframes, sample_frames, samplerate):
 94 | 
 95 |         """Calculate times for frames using sample size and samplerate."""
 96 | 
 97 |         if hasattr(sample_frames, '__call__'):
 98 |             sample_frames = sample_frames()
 99 |         # Get number of frames for time and frequency
100 |         timebins = peakframes.shape[0]
101 |         # Create array ranging from 0 to number of time frames
102 |         scale = np.arange(timebins+1)
103 |         # divide the number of samples by the total number of frames, then
104 |         # multiply by the frame numbers.
105 |         peak_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
106 |         # Divide by the samplerate to give times in seconds
107 |         peak_times = peak_times / samplerate
108 |         return peak_times
109 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/SpectralCentroidAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import numpy as np
  3 | import logging
  4 | import pdb
  5 | 
  6 | from Analysis import Analysis
  7 | 
  8 | class SpectralCentroidAnalysis(Analysis):
  9 |     """
 10 |     Spectral centroid descriptor class for generation of spectral centroid
 11 |     audio analysis.
 12 | 
 13 |     This descriptor calculates the spectral centroid for overlapping grains of
 14 |     an AnalysedAudioFile object.  A full definition of spectral centroid
 15 |     analysis can be found in the documentation.
 16 | 
 17 |     Arguments:
 18 | 
 19 |     - analysis_group: the HDF5 file group to use for the storage of the
 20 |       analysis.
 21 | 
 22 |     - config: The configuration module used to configure the analysis
 23 |     """
 24 | 
 25 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 26 |         super(SpectralCentroidAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'SpcCntr')
 27 |         # Create logger for module
 28 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 29 |         # Store reference to the file to be analysed
 30 |         self.AnalysedAudioFile = AnalysedAudioFile
 31 |         self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2.
 32 |         try:
 33 |             fft = self.AnalysedAudioFile.analyses["fft"]
 34 |         except KeyError:
 35 |             raise KeyError("FFT analysis is required for spectral spread "
 36 |                              "analysis.")
 37 | 
 38 |         self.analysis_group = analysis_group
 39 |         self.logger.info("Creating Spectral Centroid analysis for {0}".format(self.AnalysedAudioFile.name))
 40 |         self.create_analysis(
 41 |             self.create_spccntr_analysis,
 42 |             fft.analysis['frames'],
 43 |             self.AnalysedAudioFile.samplerate
 44 |         )
 45 |         self.spccntr_window_count = None
 46 | 
 47 |     def hdf5_dataset_formatter(self, analysis_method, *args, **kwargs):
 48 |         '''
 49 |         Formats the output from the analysis method to save to the HDF5 file.
 50 |         '''
 51 |         samplerate = self.AnalysedAudioFile.samplerate
 52 |         output = self.create_spccntr_analysis(*args, **kwargs)
 53 |         times = self.calc_spccntr_frame_times(output, self.AnalysedAudioFile.frames, samplerate)
 54 |         return ({'frames': output, 'times': times}, {})
 55 | 
 56 |     @staticmethod
 57 |     def create_spccntr_analysis(fft, samplerate, output_format="ind"):
 58 |         '''
 59 |         Calculate the spectral centroid of the fft frames.
 60 | 
 61 |         samplerate: the samplerate of the audio analysed.
 62 |         output_format = Choose either "freq" for output in Hz or "ind" for bin
 63 |         index output
 64 |         '''
 65 |         fft = fft[:]
 66 |         # Get the positive magnitudes of each bin.
 67 |         magnitudes = np.abs(fft)
 68 |         # Get the highest magnitude.
 69 |         mag_max = np.max(magnitudes)
 70 |         if not mag_max:
 71 |             y = np.empty(magnitudes.shape[0])
 72 |             y.fill(np.nan)
 73 |             return y
 74 |         # Calculate the centre frequency of each rfft bin.
 75 |         if output_format == "freq":
 76 |             freqs = np.fft.rfftfreq((np.size(fft, axis=1)*2)-1, 1.0/samplerate)
 77 |         elif output_format == "ind":
 78 |             freqs = np.arange(np.size(fft, axis=1))
 79 |         else:
 80 |             raise ValueError("\'{0}\' is not a valid output "
 81 |                              "format.".format(output_format))
 82 |         # Calculate the weighted mean
 83 |         y = np.sum(magnitudes*freqs, axis=1) / (np.sum(magnitudes, axis=1))
 84 | 
 85 |         return y
 86 | 
 87 |     @staticmethod
 88 |     def calc_spccntr_frame_times(spccntr_frames, sample_frame_count, samplerate):
 89 | 
 90 |         """Calculate times for frames using sample size and samplerate."""
 91 | 
 92 |         # Get number of frames for time and frequency
 93 |         timebins = spccntr_frames.shape[0]
 94 |         # Create array ranging from 0 to number of time frames
 95 |         scale = np.arange(timebins+1)
 96 |         # divide the number of samples by the total number of frames, then
 97 |         # multiply by the frame numbers.
 98 |         spccntr_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float)
 99 |         # Divide by the samplerate to give times in seconds
100 |         spccntr_times = spccntr_times / samplerate
101 |         return spccntr_times
102 | 
103 | 


--------------------------------------------------------------------------------
/src/sppysound/Examples/Database analysis example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Creating a database of analysed audio files\n",
  8 |     "Database objects are used to group audio files and their analyses into a single object in order to perform further operations (such as matching).\n"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {
 15 |     "collapsed": false
 16 |    },
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from sppysound.database import AudioDatabase\n",
 20 |     "import analysis_config"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "Specify the directory to search recursively for audio files in."
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 3,
 33 |    "metadata": {
 34 |     "collapsed": false
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "file_dir = \"./ExampleFiles\""
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "Specify the directory to generate the database in."
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 4,
 51 |    "metadata": {
 52 |     "collapsed": true
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "database_dir = \"./ExampleDatabase\""
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "Create a list of analysis trings that determine the descriptors to be generated by the object"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 5,
 69 |    "metadata": {
 70 |     "collapsed": true
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "analysis_list = [\"rms\", \"f0\"]"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 7,
 80 |    "metadata": {
 81 |     "collapsed": false
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "database = AudioDatabase(file_dir, database_dir, analysis_list=analysis_list, config=analysis_config)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "The load_database method will search for any pre-existing analyses and load these, aswell as generating new analyses that aren't already present. These will be organized and stored in the database directory in \"data\" and \"audio\" sub-directories."
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 8,
 98 |    "metadata": {
 99 |     "collapsed": false
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "database.load_database(reanalyse=False)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "Once analysed, the database object can be used with objects such as the matcher object it's entries to other databases."
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "A \"config.py\" file is required to be used as a configuration module for the database. This will contain configurations for initialising audio files contained in the database. The example config.py looks like this:\n",
118 |     "\n",
119 |     "~~~python\n",
120 |     "rms = {\n",
121 |     "    \"window_size\": 100,\n",
122 |     "    \"overlap\": 2,\n",
123 |     "}\n",
124 |     "\n",
125 |     "analysis_dict = {\n",
126 |     "    \"f0\": \"log2_median\",\n",
127 |     "    \"rms\": \"mean\"\n",
128 |     "}\n",
129 |     "\n",
130 |     "analysis = {\n",
131 |     "    \"reanalyse\": False\n",
132 |     "}\n",
133 |     "\n",
134 |     "output_file = {\n",
135 |     "    \"samplerate\": 44100,\n",
136 |     "    \"format\": 131075,\n",
137 |     "    \"channels\": 1\n",
138 |     "}\n",
139 |     "~~~"
140 |    ]
141 |   }
142 |  ],
143 |  "metadata": {
144 |   "kernelspec": {
145 |    "display_name": "Python 2",
146 |    "language": "python",
147 |    "name": "python2"
148 |   },
149 |   "language_info": {
150 |    "codemirror_mode": {
151 |     "name": "ipython",
152 |     "version": 2
153 |    },
154 |    "file_extension": ".py",
155 |    "mimetype": "text/x-python",
156 |    "name": "python",
157 |    "nbconvert_exporter": "python",
158 |    "pygments_lexer": "ipython2",
159 |    "version": "2.7.10"
160 |   }
161 |  },
162 |  "nbformat": 4,
163 |  "nbformat_minor": 0
164 | }
165 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/DatabaseAnalysisExample.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Creating a database of analysed audio files\n",
  8 |     "Database objects are used to group audio files and their analyses into a single object in order to perform further operations (such as matching).\n"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {
 15 |     "collapsed": false
 16 |    },
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from sppysound.database import AudioDatabase\n",
 20 |     "import analysis_config"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "Specify the directory to search recursively for audio files in."
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {
 34 |     "collapsed": false
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "file_dir = \"./ExampleFiles\""
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "Specify the directory to generate the database in."
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 3,
 51 |    "metadata": {
 52 |     "collapsed": true
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "database_dir = \"./ExampleDatabase\""
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "Create a list of analysis trings that determine the descriptors to be generated by the object"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 4,
 69 |    "metadata": {
 70 |     "collapsed": true
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "analysis_list = [\"rms\", \"f0\"]"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 5,
 80 |    "metadata": {
 81 |     "collapsed": false
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "database = AudioDatabase(\n",
 86 |     "    file_dir, \n",
 87 |     "    database_dir, \n",
 88 |     "    analysis_list=analysis_list, \n",
 89 |     "    config=analysis_config\n",
 90 |     ")"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "The load_database method will search for any pre-existing analyses and load these, aswell as generating new analyses that aren't already present. These will be organized and stored in the database directory in \"data\" and \"audio\" sub-directories."
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 6,
103 |    "metadata": {
104 |     "collapsed": false
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "database.load_database(reanalyse=True)"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "Once analysed, the database object can be used with objects such as the matcher object it's entries to other databases."
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "A \"config.py\" file is required to be used as a configuration module for the database. This will contain configurations for initialising audio files contained in the database. The example config.py looks like this:\n",
123 |     "\n",
124 |     "~~~python\n",
125 |     "rms = {\n",
126 |     "    \"window_size\": 100,\n",
127 |     "    \"overlap\": 2,\n",
128 |     "}\n",
129 |     "\n",
130 |     "analysis_dict = {\n",
131 |     "    \"f0\": \"log2_median\",\n",
132 |     "    \"rms\": \"mean\"\n",
133 |     "}\n",
134 |     "\n",
135 |     "analysis = {\n",
136 |     "    \"reanalyse\": False\n",
137 |     "}\n",
138 |     "\n",
139 |     "output_file = {\n",
140 |     "    \"samplerate\": 44100,\n",
141 |     "    \"format\": 131075,\n",
142 |     "    \"channels\": 1\n",
143 |     "}\n",
144 |     "~~~"
145 |    ]
146 |   }
147 |  ],
148 |  "metadata": {
149 |   "kernelspec": {
150 |    "display_name": "Python 2",
151 |    "language": "python",
152 |    "name": "python2"
153 |   },
154 |   "language_info": {
155 |    "codemirror_mode": {
156 |     "name": "ipython",
157 |     "version": 2
158 |    },
159 |    "file_extension": ".py",
160 |    "mimetype": "text/x-python",
161 |    "name": "python",
162 |    "nbconvert_exporter": "python",
163 |    "pygments_lexer": "ipython2",
164 |    "version": "2.7.11"
165 |   }
166 |  },
167 |  "nbformat": 4,
168 |  "nbformat_minor": 0
169 | }
170 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/F0HarmRatioAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import numpy as np
  3 | import logging
  4 | import pdb
  5 | import numpy as np
  6 | from numpy.lib import stride_tricks
  7 | from Analysis import Analysis
  8 | from scipy import signal
  9 | from numpy.fft import fft, ifft, fftshift
 10 | import warnings
 11 | 
 12 | from numpy import polyfit, arange
 13 | 
 14 | class F0HarmRatioAnalysis(Analysis):
 15 | 
 16 |     """
 17 |     The F0 HarmRatio analysis object is a placeholder class to allow access to
 18 |     the harmonic ratio generated in the f0 analysis.  As a result it does not
 19 |     have it's own "create analysis method as other analyses do. it is designed
 20 |     to be used for the retreival of the f0 harmonic ratio analysis for
 21 |     matching.
 22 | 
 23 |     F0 analysis must be generated for the AnalysedAudioFile in order to use
 24 |     this object.
 25 |     """
 26 | 
 27 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 28 |         super(F0HarmRatioAnalysis, self).__init__(AnalysedAudioFile, frames, analysis_group, 'F0HarmRatio')
 29 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 30 |         # Store reference to the file to be analysed
 31 |         self.AnalysedAudioFile = AnalysedAudioFile
 32 | 
 33 |         if config:
 34 |             self.threshold = config.f0["ratio_threshold"]
 35 |         else:
 36 |             self.threshold = 0.
 37 | 
 38 |         self.analysis_group = analysis_group
 39 |         self.logger.info("Initialising F0HarmRatio analysis for {0}".format(self.AnalysedAudioFile.name))
 40 | 
 41 |     def get_analysis_grains(self, start, end):
 42 |         """
 43 |         Retrieve analysis frames for period specified in start and end times.
 44 |         arrays of start and end time pairs will produce an array of equivelant
 45 |         size containing frames for these times.
 46 |         """
 47 |         times = self.analysis_group["F0"]["times"][:]
 48 |         hr = self.analysis_group["F0"]["harmonic_ratio"][:]
 49 |         start = start / 1000
 50 |         end = end / 1000
 51 |         vtimes = times.reshape(-1, 1)
 52 | 
 53 |         nan_inds = hr < self.threshold
 54 |         hr[nan_inds] = np.nan
 55 | 
 56 |         selection = np.transpose((vtimes >= start) & (vtimes <= end))
 57 |         if not selection.any():
 58 |             frame_center = start + (end-start)/2.
 59 |             closest_frames = np.abs(vtimes-frame_center).argsort()[:2]
 60 |             selection[closest_frames] = True
 61 | 
 62 |         return ((hr, times), selection)
 63 | 
 64 |     @staticmethod
 65 |     def calc_F0HarmRatio_frame_times(F0HarmRatioframes, sample_frames, samplerate):
 66 | 
 67 |         """Calculate times for frames using sample size and samplerate."""
 68 |         samplerate *= 1
 69 | 
 70 |         if hasattr(sample_frames, '__call__'):
 71 |             sample_frames = sample_frames()
 72 |         # Get number of frames for time and frequency
 73 |         timebins = F0HarmRatioframes.shape[0]
 74 |         # Create array ranging from 0 to number of time frames
 75 |         scale = np.arange(timebins+1)
 76 |         # divide the number of samples by the total number of frames, then
 77 |         # multiply by the frame numbers.
 78 |         F0HarmRatio_times = (sample_frames.shape[0]/timebins) * scale[:-1]
 79 |         # Divide by the samplerate to give times in seconds
 80 |         F0HarmRatio_times = F0HarmRatio_times / samplerate
 81 |         return F0HarmRatio_times
 82 | 
 83 |     def analysis_formatter(self, data, selection, format):
 84 |         """Calculate the average analysis value of the grain using the match format specified."""
 85 |         harm_ratio, times = data
 86 |         # Get indexes of all valid frames (that aren't nan)
 87 |         valid_inds = np.isfinite(harm_ratio)
 88 | 
 89 |         format_style_dict = {
 90 |             'mean': np.mean,
 91 |             'median': np.median,
 92 |             'log2_mean': self.log2_mean,
 93 |             'log2_median': self.log2_median,
 94 |         }
 95 | 
 96 |         # For debugging apply along axis:
 97 |         #for ind, i in enumerate(selection):
 98 |         #    output[ind] = self.formatter_func(i, frames, valid_inds, harm_ratio, formatter=format_style_dict[format])
 99 | 
100 |         if not selection.size:
101 |             # TODO: Add warning here
102 |             return np.nan
103 | 
104 |         output = np.apply_along_axis(
105 |             self.formatter_func,
106 |             1,
107 |             selection,
108 |             harm_ratio,
109 |             valid_inds,
110 |             formatter=format_style_dict[format]
111 |         )
112 | 
113 |         return output
114 | 
115 | 


--------------------------------------------------------------------------------
/src/sppysound/config.py:
--------------------------------------------------------------------------------
  1 | # Specify analysis parameters for root mean square analysis.
  2 | rms = {
  3 |     # Analysis window sizes can be changed for each analysis individually.
  4 |     # These do not need to match the grain size of the matcher or synthesis.
  5 |     "window_size": 100,
  6 |     "overlap": 4,
  7 | }
  8 | 
  9 | f0 = {
 10 |     "window_size": 4096,
 11 |     "overlap": 4,
 12 |     # Currently all frames below this ratio are digaurded and left as silence.
 13 |     # Different databases will require different values for the best results.
 14 |     # Noisier databases will need lower values than more tonal databases.
 15 |     "ratio_threshold": 0.81
 16 | }
 17 | 
 18 | # Specify analysis parameters for variance analysis.
 19 | variance = {
 20 |     "window_size": 100,
 21 |     "overlap": 4
 22 | }
 23 | 
 24 | # Specify analysis parameters for temporal kurtosis analysis.
 25 | kurtosis = {
 26 |     "window_size": 100,
 27 |     "overlap": 4
 28 | }
 29 | 
 30 | # Specify analysis parameters for temporal skewness analysis.
 31 | skewness = {
 32 |     "window_size": 100,
 33 |     "overlap": 4
 34 | }
 35 | 
 36 | # Specify analysis parameters for FFT analysis.
 37 | fft = {
 38 |     # The FFT window size determines the window size for all spectral analyses.
 39 |     "window_size": 4096
 40 | }
 41 | 
 42 | database = {
 43 |     # Enables creation of symbolic links to files not in the database rather
 44 |     # than making pysical copies.
 45 |     "symlink": True
 46 | }
 47 | 
 48 | # Sets the weighting for each analysis. a higher weighting gives an analysis
 49 | # higher presendence when finding the best matches.
 50 | matcher_weightings = {
 51 |     "f0" : 8,
 52 |     "spccntr" : 1.,
 53 |     "spcsprd" : 1.,
 54 |     "spcflux" : 3.,
 55 |     "spccf" : 3.,
 56 |     "spcflatness": 3.,
 57 |     "zerox" : 1.,
 58 |     "rms" : 0.1,
 59 |     "peak": 0.1,
 60 |     "centroid": 0.5,
 61 |     "kurtosis": 2.,
 62 |     "skewness": 2.,
 63 |     "variance": 0.,
 64 |     "harm_ratio": 2
 65 | }
 66 | 
 67 | # Specifies the method for averaging analysis frames to create a single value
 68 | # for comparing to other grains. Possible formatters are: 'mean', 'median',
 69 | # 'log2_mean', 'log2_median'
 70 | analysis_dict = {
 71 |     # log2_median formats using mel scale. This is useful for analyses such as
 72 |     # F0.
 73 |     "f0": "median",
 74 |     "rms": "mean",
 75 |     "zerox": "mean",
 76 |     "spccntr": "median",
 77 |     "spcsprd": "median",
 78 |     "spcflux": "median",
 79 |     "spccf": "median",
 80 |     "spcflatness": "median",
 81 |     "peak": "mean",
 82 |     "centroid": "mean",
 83 |     "kurtosis": "mean",
 84 |     "skewness": "mean",
 85 |     "variance": "mean",
 86 |     "harm_ratio": "mean"
 87 | }
 88 | 
 89 | analysis = {
 90 |     # Force the deletion of any pre-existing analyses to create new ones. This
 91 |     # is needed for overwriting old analyses generated with different
 92 |     # parameters to the current ones.
 93 |     "reanalyse": False
 94 | }
 95 | 
 96 | matcher = {
 97 |     # Force the re-matching of analyses
 98 |     "rematch": False,
 99 |     # This value must be the same as the synthesis grain size to avoid the
100 |     # speeding up or slowing down of the resulting file in relation to the
101 |     # original.
102 |     "grain_size": 100,
103 |     "overlap": 4,
104 |     # Defines the number of matches to keep for synthesis. Note that this must
105 |     # also be specified in the synthesis config
106 |     "match_quantity": 2,
107 |     # Choose the algorithm used to perform matching. kdtree is recommended for
108 |     # larger datasets.
109 |     "method": 'kdtree'
110 | }
111 | 
112 | synthesizer = {
113 |     # Artificially scale the output grain by the difference in RMS values
114 |     # between source and target.
115 |     "enforce_intensity": True,
116 |     # Specify the ratio limit that is the grain can be scaled by.
117 |     "enf_intensity_ratio_limit": 1000.,
118 |     # Artificially modify the pitch by the difference in f0 values between
119 |     # source and target.
120 |     "enforce_f0": True,
121 |     # Specify the ratio limit that is the grain can be modified by.
122 |     "enf_f0_ratio_limit": 1.,
123 |     "grain_size": 100,
124 |     "overlap": 4,
125 |     # Normalize output, avoid clipping of final output by scaling the final
126 |     # frames.
127 |     "normalize" : False,
128 |     # Defines the number of potential grains to choose from matches when
129 |     # synthesizing output.
130 |     "match_quantity": 2
131 | }
132 | 
133 | # Specifies the format for the output file. Changing this has not been tested
134 | # so may produce errors/undesirable results.
135 | output_file = {
136 |     "samplerate": 44100,
137 |     "format": 131075,
138 |     "channels": 1
139 | }
140 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/analysis_config.py:
--------------------------------------------------------------------------------
  1 | # Specify analysis parameters for root mean square analysis.
  2 | rms = {
  3 |     # Analysis window sizes can be changed for each analysis individually.
  4 |     # These do not need to match the grain size of the matcher or synthesis.
  5 |     "window_size": 100,
  6 |     "overlap": 8,
  7 | }
  8 | 
  9 | f0 = {
 10 |     "window_size": 4096,
 11 |     "overlap": 8,
 12 |     # Currently all frames below this ratio are digaurded and left as silence.
 13 |     # Different databases will require different values for the best results.
 14 |     # Noisier databases will need lower values than more tonal databases.
 15 |     "ratio_threshold": 0.45
 16 | }
 17 | 
 18 | # Specify analysis parameters for variance analysis.
 19 | variance = {
 20 |     "window_size": 100,
 21 |     "overlap": 8
 22 | }
 23 | 
 24 | # Specify analysis parameters for temporal kurtosis analysis.
 25 | kurtosis = {
 26 |     "window_size": 100,
 27 |     "overlap": 8
 28 | }
 29 | 
 30 | # Specify analysis parameters for temporal skewness analysis.
 31 | skewness = {
 32 |     "window_size": 100,
 33 |     "overlap": 8
 34 | }
 35 | 
 36 | # Specify analysis parameters for FFT analysis.
 37 | fft = {
 38 |     # The FFT window size determines the window size for all spectral analyses.
 39 |     "window_size": 4096
 40 | }
 41 | 
 42 | database = {
 43 |     # Enables creation of symbolic links to files not in the database rather
 44 |     # than making pysical copies.
 45 |     "symlink": True
 46 | }
 47 | 
 48 | # Sets the weighting for each analysis. a higher weighting gives an analysis
 49 | # higher presendence when finding the best matches.
 50 | matcher_weightings = {
 51 |     "f0" : 0.5,
 52 |     "spccntr" : 1.,
 53 |     "spcsprd" : 1.,
 54 |     "spcflux" : 3.,
 55 |     "spccf" : 3.,
 56 |     "spcflatness": 3.,
 57 |     "zerox" : 1.,
 58 |     "rms" : 0.1,
 59 |     "peak": 0.1,
 60 |     "centroid": 0.5,
 61 |     "kurtosis": 2.,
 62 |     "skewness": 2.,
 63 |     "variance": 0.,
 64 |     "harm_ratio": 2
 65 | }
 66 | 
 67 | # Specifies the method for averaging analysis frames to create a single value
 68 | # for comparing to other grains. Possible formatters are: 'mean', 'median',
 69 | # 'log2_mean', 'log2_median'
 70 | analysis_dict = {
 71 |     # log2_median formats using mel scale. This is useful for analyses such as
 72 |     # F0.
 73 |     "f0": "log2_median",
 74 |     "rms": "mean",
 75 |     "zerox": "mean",
 76 |     "spccntr": "median",
 77 |     "spcsprd": "median",
 78 |     "spcflux": "median",
 79 |     "spccf": "median",
 80 |     "spcflatness": "median",
 81 |     "peak": "mean",
 82 |     "centroid": "mean",
 83 |     "kurtosis": "mean",
 84 |     "skewness": "mean",
 85 |     "variance": "mean",
 86 |     "harm_ratio": "mean"
 87 | }
 88 | 
 89 | analysis = {
 90 |     # Force the deletion of any pre-existing analyses to create new ones. This
 91 |     # is needed for overwriting old analyses generated with different
 92 |     # parameters to the current ones.
 93 |     "reanalyse": False
 94 | }
 95 | 
 96 | matcher = {
 97 |     # Force the re-matching of analyses
 98 |     "rematch": False,
 99 |     # This value must be the same as the synthesis grain size to avoid the
100 |     # speeding up or slowing down of the resulting file in relation to the
101 |     # original.
102 |     "grain_size": 100,
103 |     "overlap": 8,
104 |     # Defines the number of matches to keep for synthesis. Note that this must
105 |     # also be specified in the synthesis config
106 |     "match_quantity": 5,
107 |     # Choose the algorithm used to perform matching. kdtree is recommended for
108 |     # larger datasets.
109 |     "method": 'kdtree'
110 | }
111 | 
112 | synthesizer = {
113 |     # Artificially scale the output grain by the difference in RMS values
114 |     # between source and target.
115 |     "enforce_intensity": True,
116 |     # Specify the ratio limit that is the grain can be scaled by.
117 |     "enf_intensity_ratio_limit": 1000.,
118 |     # Artificially modify the pitch by the difference in f0 values between
119 |     # source and target.
120 |     "enforce_f0": True,
121 |     # Specify the ratio limit that is the grain can be modified by.
122 |     "enf_f0_ratio_limit": 10.,
123 |     "grain_size": 100,
124 |     "overlap": 8,
125 |     # Normalize output, avoid clipping of final output by scaling the final
126 |     # frames.
127 |     "normalize" : True,
128 |     # Defines the number of potential grains to choose from matches when
129 |     # synthesizing output.
130 |     "match_quantity": 5
131 | }
132 | 
133 | # Specifies the format for the output file. Changing this has not been tested
134 | # so may produce errors/undesirable results.
135 | output_file = {
136 |     "samplerate": 44100,
137 |     "format": 131075,
138 |     "channels": 1
139 | }
140 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/matching_config.py:
--------------------------------------------------------------------------------
  1 | # Specify analysis parameters for root mean square analysis.
  2 | rms = {
  3 |     # Analysis window sizes can be changed for each analysis individually.
  4 |     # These do not need to match the grain size of the matcher or synthesis.
  5 |     "window_size": 100,
  6 |     "overlap": 8,
  7 | }
  8 | 
  9 | f0 = {
 10 |     "window_size": 4096,
 11 |     "overlap": 8,
 12 |     # Currently all frames below this ratio are digaurded and left as silence.
 13 |     # Different databases will require different values for the best results.
 14 |     # Noisier databases will need lower values than more tonal databases.
 15 |     "ratio_threshold": 0.45
 16 | }
 17 | 
 18 | # Specify analysis parameters for variance analysis.
 19 | variance = {
 20 |     "window_size": 100,
 21 |     "overlap": 8
 22 | }
 23 | 
 24 | # Specify analysis parameters for temporal kurtosis analysis.
 25 | kurtosis = {
 26 |     "window_size": 100,
 27 |     "overlap": 8
 28 | }
 29 | 
 30 | # Specify analysis parameters for temporal skewness analysis.
 31 | skewness = {
 32 |     "window_size": 100,
 33 |     "overlap": 8
 34 | }
 35 | 
 36 | # Specify analysis parameters for FFT analysis.
 37 | fft = {
 38 |     # The FFT window size determines the window size for all spectral analyses.
 39 |     "window_size": 4096
 40 | }
 41 | 
 42 | database = {
 43 |     # Enables creation of symbolic links to files not in the database rather
 44 |     # than making pysical copies.
 45 |     "symlink": True
 46 | }
 47 | 
 48 | # Sets the weighting for each analysis. a higher weighting gives an analysis
 49 | # higher presendence when finding the best matches.
 50 | matcher_weightings = {
 51 |     "f0" : 0.5,
 52 |     "spccntr" : 1.,
 53 |     "spcsprd" : 1.,
 54 |     "spcflux" : 3.,
 55 |     "spccf" : 3.,
 56 |     "spcflatness": 3.,
 57 |     "zerox" : 1.,
 58 |     "rms" : 0.1,
 59 |     "peak": 0.1,
 60 |     "centroid": 0.5,
 61 |     "kurtosis": 2.,
 62 |     "skewness": 2.,
 63 |     "variance": 0.,
 64 |     "harm_ratio": 2
 65 | }
 66 | 
 67 | # Specifies the method for averaging analysis frames to create a single value
 68 | # for comparing to other grains. Possible formatters are: 'mean', 'median',
 69 | # 'log2_mean', 'log2_median'
 70 | analysis_dict = {
 71 |     # log2_median formats using mel scale. This is useful for analyses such as
 72 |     # F0.
 73 |     "f0": "log2_median",
 74 |     "rms": "mean",
 75 |     "zerox": "mean",
 76 |     "spccntr": "median",
 77 |     "spcsprd": "median",
 78 |     "spcflux": "median",
 79 |     "spccf": "median",
 80 |     "spcflatness": "median",
 81 |     "peak": "mean",
 82 |     "centroid": "mean",
 83 |     "kurtosis": "mean",
 84 |     "skewness": "mean",
 85 |     "variance": "mean",
 86 |     "harm_ratio": "mean"
 87 | }
 88 | 
 89 | analysis = {
 90 |     # Force the deletion of any pre-existing analyses to create new ones. This
 91 |     # is needed for overwriting old analyses generated with different
 92 |     # parameters to the current ones.
 93 |     "reanalyse": False
 94 | }
 95 | 
 96 | matcher = {
 97 |     # Force the re-matching of analyses
 98 |     "rematch": False,
 99 |     # This value must be the same as the synthesis grain size to avoid the
100 |     # speeding up or slowing down of the resulting file in relation to the
101 |     # original.
102 |     "grain_size": 100,
103 |     "overlap": 8,
104 |     # Defines the number of matches to keep for synthesis. Note that this must
105 |     # also be specified in the synthesis config
106 |     "match_quantity": 5,
107 |     # Choose the algorithm used to perform matching. kdtree is recommended for
108 |     # larger datasets.
109 |     "method": 'kdtree'
110 | }
111 | 
112 | synthesizer = {
113 |     # Artificially scale the output grain by the difference in RMS values
114 |     # between source and target.
115 |     "enforce_intensity": True,
116 |     # Specify the ratio limit that is the grain can be scaled by.
117 |     "enf_intensity_ratio_limit": 1000.,
118 |     # Artificially modify the pitch by the difference in f0 values between
119 |     # source and target.
120 |     "enforce_f0": True,
121 |     # Specify the ratio limit that is the grain can be modified by.
122 |     "enf_f0_ratio_limit": 10.,
123 |     "grain_size": 100,
124 |     "overlap": 8,
125 |     # Normalize output, avoid clipping of final output by scaling the final
126 |     # frames.
127 |     "normalize" : True,
128 |     # Defines the number of potential grains to choose from matches when
129 |     # synthesizing output.
130 |     "match_quantity": 5
131 | }
132 | 
133 | # Specifies the format for the output file. Changing this has not been tested
134 | # so may produce errors/undesirable results.
135 | output_file = {
136 |     "samplerate": 44100,
137 |     "format": 131075,
138 |     "channels": 1
139 | }
140 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/synthesis_config.py:
--------------------------------------------------------------------------------
  1 | # Specify analysis parameters for root mean square analysis.
  2 | rms = {
  3 |     # Analysis window sizes can be changed for each analysis individually.
  4 |     # These do not need to match the grain size of the matcher or synthesis.
  5 |     "window_size": 100,
  6 |     "overlap": 8,
  7 | }
  8 | 
  9 | f0 = {
 10 |     "window_size": 4096,
 11 |     "overlap": 8,
 12 |     # Currently all frames below this ratio are digaurded and left as silence.
 13 |     # Different databases will require different values for the best results.
 14 |     # Noisier databases will need lower values than more tonal databases.
 15 |     "ratio_threshold": 0.45
 16 | }
 17 | 
 18 | # Specify analysis parameters for variance analysis.
 19 | variance = {
 20 |     "window_size": 100,
 21 |     "overlap": 8
 22 | }
 23 | 
 24 | # Specify analysis parameters for temporal kurtosis analysis.
 25 | kurtosis = {
 26 |     "window_size": 100,
 27 |     "overlap": 8
 28 | }
 29 | 
 30 | # Specify analysis parameters for temporal skewness analysis.
 31 | skewness = {
 32 |     "window_size": 100,
 33 |     "overlap": 8
 34 | }
 35 | 
 36 | # Specify analysis parameters for FFT analysis.
 37 | fft = {
 38 |     # The FFT window size determines the window size for all spectral analyses.
 39 |     "window_size": 4096
 40 | }
 41 | 
 42 | database = {
 43 |     # Enables creation of symbolic links to files not in the database rather
 44 |     # than making pysical copies.
 45 |     "symlink": True
 46 | }
 47 | 
 48 | # Sets the weighting for each analysis. a higher weighting gives an analysis
 49 | # higher presendence when finding the best matches.
 50 | matcher_weightings = {
 51 |     "f0" : 0.5,
 52 |     "spccntr" : 1.,
 53 |     "spcsprd" : 1.,
 54 |     "spcflux" : 3.,
 55 |     "spccf" : 3.,
 56 |     "spcflatness": 3.,
 57 |     "zerox" : 1.,
 58 |     "rms" : 0.1,
 59 |     "peak": 0.1,
 60 |     "centroid": 0.5,
 61 |     "kurtosis": 2.,
 62 |     "skewness": 2.,
 63 |     "variance": 0.,
 64 |     "harm_ratio": 2
 65 | }
 66 | 
 67 | # Specifies the method for averaging analysis frames to create a single value
 68 | # for comparing to other grains. Possible formatters are: 'mean', 'median',
 69 | # 'log2_mean', 'log2_median'
 70 | analysis_dict = {
 71 |     # log2_median formats using mel scale. This is useful for analyses such as
 72 |     # F0.
 73 |     "f0": "log2_median",
 74 |     "rms": "mean",
 75 |     "zerox": "mean",
 76 |     "spccntr": "median",
 77 |     "spcsprd": "median",
 78 |     "spcflux": "median",
 79 |     "spccf": "median",
 80 |     "spcflatness": "median",
 81 |     "peak": "mean",
 82 |     "centroid": "mean",
 83 |     "kurtosis": "mean",
 84 |     "skewness": "mean",
 85 |     "variance": "mean",
 86 |     "harm_ratio": "mean"
 87 | }
 88 | 
 89 | analysis = {
 90 |     # Force the deletion of any pre-existing analyses to create new ones. This
 91 |     # is needed for overwriting old analyses generated with different
 92 |     # parameters to the current ones.
 93 |     "reanalyse": False
 94 | }
 95 | 
 96 | matcher = {
 97 |     # Force the re-matching of analyses
 98 |     "rematch": False,
 99 |     # This value must be the same as the synthesis grain size to avoid the
100 |     # speeding up or slowing down of the resulting file in relation to the
101 |     # original.
102 |     "grain_size": 100,
103 |     "overlap": 8,
104 |     # Defines the number of matches to keep for synthesis. Note that this must
105 |     # also be specified in the synthesis config
106 |     "match_quantity": 5,
107 |     # Choose the algorithm used to perform matching. kdtree is recommended for
108 |     # larger datasets.
109 |     "method": 'kdtree'
110 | }
111 | 
112 | synthesizer = {
113 |     # Artificially scale the output grain by the difference in RMS values
114 |     # between source and target.
115 |     "enforce_intensity": True,
116 |     # Specify the ratio limit that is the grain can be scaled by.
117 |     "enf_intensity_ratio_limit": 1000.,
118 |     # Artificially modify the pitch by the difference in f0 values between
119 |     # source and target.
120 |     "enforce_f0": True,
121 |     # Specify the ratio limit that is the grain can be modified by.
122 |     "enf_f0_ratio_limit": 10.,
123 |     "grain_size": 100,
124 |     "overlap": 8,
125 |     # Normalize output, avoid clipping of final output by scaling the final
126 |     # frames.
127 |     "normalize" : True,
128 |     # Defines the number of potential grains to choose from matches when
129 |     # synthesizing output.
130 |     "match_quantity": 5
131 | }
132 | 
133 | # Specifies the format for the output file. Changing this has not been tested
134 | # so may produce errors/undesirable results.
135 | output_file = {
136 |     "samplerate": 44100,
137 |     "format": 131075,
138 |     "channels": 1
139 | }
140 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/CentroidAnalysis.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import print_function, division
  3 | import os
  4 | import numpy as np
  5 | import logging
  6 | from scipy import signal
  7 | from numpy.lib import stride_tricks
  8 | import pdb
  9 | 
 10 | 
 11 | from AnalysisTools import ButterFilter
 12 | from fileops import pathops
 13 | 
 14 | from Analysis import Analysis
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | class CentroidAnalysis(Analysis):
 20 | 
 21 |     """
 22 |     Temporal centroid descriptor class for generation of temporal centroid
 23 |     audio analysis.
 24 | 
 25 |     This descriptor calculates the temporal centroid for overlapping grains of
 26 |     an AnalysedAudioFile object.  A full definition of temporal centroid
 27 |     analysis can be found in the documentation.
 28 | 
 29 |     Arguments:
 30 | 
 31 |     - analysis_group: the HDF5 file group to use for the storage of the
 32 |       analysis.
 33 | 
 34 |     - config: The configuration module used to configure the analysis
 35 |     """
 36 | 
 37 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 38 |         super(CentroidAnalysis, self).__init__(AnalysedAudioFile, frames, analysis_group, 'Centroid')
 39 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 40 |         # Store reference to the file to be analysed
 41 |         self.AnalysedAudioFile = AnalysedAudioFile
 42 | 
 43 |         self.analysis_group = analysis_group
 44 |         self.logger.info("Creating Centroid analysis for {0}".format(self.AnalysedAudioFile.name))
 45 |         self.create_analysis(frames)
 46 | 
 47 |     @staticmethod
 48 |     def create_centroid_analysis(frames, window_size=512,
 49 |                             window=signal.triang,
 50 |                             overlapFac=0.5):
 51 |         """
 52 |         Calculate the Centroid values of windowed segments of the audio file and
 53 |         save to disk.
 54 |         """
 55 |         if hasattr(frames, '__call__'):
 56 |             frames = frames()
 57 | 
 58 |         # Calculate the period of the window in hz
 59 |         # lowest_freq = 1.0 / window_size
 60 |         # Filter frequencies lower than the period of the window
 61 |         # filter = ButterFilter()
 62 |         # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate)
 63 |         # TODO: Fix filter
 64 |         # frames = filter.filter_butter(frames)
 65 | 
 66 |         # Generate a window function to apply to centroid windows before analysis
 67 |         win = window(window_size)
 68 |         hopSize = int(window_size - np.floor(overlapFac * window_size))
 69 | 
 70 |         # zeros at beginning (thus center of 1st window should be for sample nr. 0)
 71 |         samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
 72 | 
 73 |         # cols for windowing
 74 |         cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
 75 |         # zeros at end (thus samples can be fully covered by frames)
 76 |         samples = np.append(samples, np.zeros(window_size))
 77 | 
 78 |         frames = stride_tricks.as_strided(
 79 |             samples,
 80 |             shape=(cols, window_size),
 81 |             strides=(samples.strides[0]*hopSize, samples.strides[0])
 82 |         ).copy()
 83 | 
 84 |         frames *= win
 85 |         weighted_sum = np.sum((np.arange(frames.shape[1])+1) * frames, axis=1)
 86 | 
 87 |         centroid = weighted_sum / np.sum(frames, axis=1)
 88 | 
 89 |         return centroid
 90 | 
 91 |     def hdf5_dataset_formatter(self, *args, **kwargs):
 92 |         '''
 93 |         Formats the output from the analysis method to save to the HDF5 file.
 94 |         '''
 95 |         samplerate = self.AnalysedAudioFile.samplerate
 96 |         centroid = self.create_centroid_analysis(*args, **kwargs)
 97 |         centroid_times = self.calc_centroid_frame_times(centroid, args[0], samplerate)
 98 |         return ({'frames': centroid, 'times': centroid_times}, {})
 99 | 
100 |     @staticmethod
101 |     def calc_centroid_frame_times(centroidframes, sample_frames, samplerate):
102 | 
103 |         """Calculate times for frames using sample size and samplerate."""
104 | 
105 |         if hasattr(sample_frames, '__call__'):
106 |             sample_frames = sample_frames()
107 |         # Get number of frames for time and frequency
108 |         timebins = centroidframes.shape[0]
109 |         # Create array ranging from 0 to number of time frames
110 |         scale = np.arange(timebins+1)
111 |         # divide the number of samples by the total number of frames, then
112 |         # multiply by the frame numbers.
113 |         centroid_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
114 |         # Divide by the samplerate to give times in seconds
115 |         centroid_times = centroid_times / samplerate
116 |         return centroid_times
117 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/VarianceAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import os
  3 | import numpy as np
  4 | import logging
  5 | from scipy import signal
  6 | from numpy.lib import stride_tricks
  7 | import pdb
  8 | 
  9 | 
 10 | from AnalysisTools import ButterFilter
 11 | from fileops import pathops
 12 | 
 13 | from Analysis import Analysis
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | class VarianceAnalysis(Analysis):
 19 | 
 20 |     """
 21 |     Variance descriptor class for generation of variance audio analysis.
 22 | 
 23 |     This descriptor calculates the Root Mean Square analysis for overlapping
 24 |     grains of an AnalysedAudioFile object.  A full definition of variance
 25 |     analysis can be found in the documentation.
 26 | 
 27 |     Arguments:
 28 | 
 29 |     - analysis_group: the HDF5 file group to use for the storage of the
 30 |       analysis.
 31 | 
 32 |     - config: The configuration module used to configure the analysis
 33 |     """
 34 | 
 35 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 36 |         super(VarianceAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'variance')
 37 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 38 |         # Store reference to the file to be analysed
 39 |         self.AnalysedAudioFile = AnalysedAudioFile
 40 | 
 41 |         if config:
 42 |             self.window_size = config.variance["window_size"] * self.AnalysedAudioFile.samplerate / 1000
 43 |             self.overlap = 1. / config.variance["overlap"]
 44 | 
 45 |         self.analysis_group = analysis_group
 46 |         self.logger.info("Creating variance analysis for {0}".format(self.AnalysedAudioFile.name))
 47 |         self.create_analysis(frames, self.window_size, overlapFac=self.overlap)
 48 | 
 49 |     @staticmethod
 50 |     def create_variance_analysis(frames, window_size=512,
 51 |                             overlapFac=0.5):
 52 |         """
 53 |         Generate an energy contour analysis.
 54 | 
 55 |         Calculate the Variance values of windowed segments of the audio file and
 56 |         save to disk.
 57 |         """
 58 |         # Calculate the period of the window in hz
 59 |         # lowest_freq = 1.0 / window_size
 60 |         # Filter frequencies lower than the period of the window
 61 |         # filter = ButterFilter()
 62 |         # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate)
 63 |         # TODO: Fix filter
 64 |         # frames = filter.filter_butter(frames)
 65 | 
 66 |         if hasattr(frames, '__call__'):
 67 |             frames = frames()
 68 |         hopSize = int(window_size - np.floor(overlapFac * window_size))
 69 | 
 70 |         # zeros at beginning (thus center of 1st window should be for sample nr. 0)
 71 |         samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
 72 | 
 73 |         # cols for windowing
 74 |         cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
 75 |         # zeros at end (thus samples can be fully covered by frames)
 76 |         samples = np.append(samples, np.zeros(window_size))
 77 | 
 78 |         frames = stride_tricks.as_strided(
 79 |             samples,
 80 |             shape=(cols, window_size),
 81 |             strides=(samples.strides[0]*hopSize, samples.strides[0])
 82 |         ).copy()
 83 | 
 84 |         frame_mean = np.mean(frames, axis=1)
 85 |         variance = (1 / window_size) * np.sum((frames-np.vstack(frame_mean))**2, axis=1)
 86 | 
 87 |         return variance
 88 | 
 89 |     def hdf5_dataset_formatter(self, *args, **kwargs):
 90 |         '''
 91 |         Formats the output from the analysis method to save to the HDF5 file.
 92 |         '''
 93 |         samplerate = self.AnalysedAudioFile.samplerate
 94 |         variance = self.create_variance_analysis(*args, **kwargs)
 95 |         variance_times = self.calc_variance_frame_times(variance, args[0], samplerate)
 96 |         return ({'frames': variance, 'times': variance_times}, {})
 97 | 
 98 |     @staticmethod
 99 |     def calc_variance_frame_times(varianceframes, sample_frames, samplerate):
100 | 
101 |         """Calculate times for frames using sample size and samplerate."""
102 | 
103 |         if hasattr(sample_frames, '__call__'):
104 |             sample_frames = sample_frames()
105 |         # Get number of frames for time and frequency
106 |         timebins = varianceframes.shape[0]
107 |         # Create array ranging from 0 to number of time frames
108 |         scale = np.arange(timebins+1)
109 |         # divide the number of samples by the total number of frames, then
110 |         # multiply by the frame numbers.
111 |         variance_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
112 |         # Divide by the samplerate to give times in seconds
113 |         variance_times = variance_times / samplerate
114 |         return variance_times
115 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/SpectralCrestFactorAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import numpy as np
  3 | import logging
  4 | import pdb
  5 | import warnings
  6 | 
  7 | from Analysis import Analysis
  8 | 
  9 | class SpectralCrestFactorAnalysis(Analysis):
 10 |     """
 11 |     Spectral crest factor descriptor class for generation of spectral crest
 12 |     factor audio analysis.
 13 | 
 14 |     This descriptor calculates the spectral crest factor for overlapping grains
 15 |     of an AnalysedAudioFile object.  A full definition can be found in the
 16 |     documentation.
 17 | 
 18 |     Arguments:
 19 | 
 20 |     - analysis_group: the HDF5 file group to use for the storage of the
 21 |       analysis.
 22 | 
 23 |     - config: The configuration module used to configure the analysis
 24 |     """
 25 | 
 26 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 27 |         super(SpectralCrestFactorAnalysis, self).__init__(AnalysedAudioFile, frames, analysis_group, 'SpcCrestFactor')
 28 |         # Create logger for module
 29 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 30 |         # Store reference to the file to be analysed
 31 |         self.AnalysedAudioFile = AnalysedAudioFile
 32 |         self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2.
 33 |         try:
 34 |             fft = self.AnalysedAudioFile.analyses["fft"]
 35 |         except KeyError:
 36 |             raise KeyError("FFT analysis is required for spectral spread "
 37 |                              "analysis.")
 38 | 
 39 |         self.analysis_group = analysis_group
 40 |         self.logger.info("Creating Spectral CrestFactor analysis for {0}".format(self.AnalysedAudioFile.name))
 41 |         self.create_analysis(
 42 |             self.create_spccf_analysis,
 43 |             fft.analysis['frames'],
 44 |         )
 45 |         self.spccf_window_count = None
 46 | 
 47 |     def hdf5_dataset_formatter(self, analysis_method, *args, **kwargs):
 48 |         '''
 49 |         Formats the output from the analysis method to save to the HDF5 file.
 50 |         '''
 51 |         samplerate = self.AnalysedAudioFile.samplerate
 52 |         output = self.create_spccf_analysis(*args, **kwargs)
 53 |         times = self.calc_spccf_frame_times(output, self.AnalysedAudioFile.frames, samplerate)
 54 |         return ({'frames': output, 'times': times}, {})
 55 | 
 56 |     @staticmethod
 57 |     def create_spccf_analysis(fft):
 58 |         '''
 59 |         Calculate the spectral crest factor of the fft frames.
 60 |         '''
 61 |         fft = fft[:]
 62 |         # Get the positive magnitudes of each bin.
 63 |         magnitudes = np.abs(fft)
 64 |         # Get highest magnitude
 65 |         if not np.nonzero(magnitudes)[0].size:
 66 |             y = np.empty(magnitudes.shape[0])
 67 |             y.fill(np.nan)
 68 |             return y
 69 |             # Get the highest magnitude value for each spectral frame
 70 |         max_bins = np.max(magnitudes, axis=1)
 71 |         mag_sum = np.sum(magnitudes, axis=1)
 72 |         with warnings.catch_warnings():
 73 |             warnings.filterwarnings('ignore')
 74 |             spectral_cf = max_bins / mag_sum
 75 | 
 76 |         return spectral_cf
 77 | 
 78 |     @staticmethod
 79 |     def calc_spccf_frame_times(spccf_frames, sample_frame_count, samplerate):
 80 | 
 81 |         """Calculate times for frames using sample size and samplerate."""
 82 | 
 83 |         # Get number of frames for time and frequency
 84 |         timebins = spccf_frames.shape[0]
 85 |         # Create array ranging from 0 to number of time frames
 86 |         scale = np.arange(timebins+1)
 87 |         # divide the number of samples by the total number of frames, then
 88 |         # multiply by the frame numbers.
 89 |         spccf_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float)
 90 |         # Divide by the samplerate to give times in seconds
 91 |         spccf_times = spccf_times / samplerate
 92 |         return spccf_times
 93 | 
 94 |     def mean_formatter(self, data):
 95 |         """Calculate the mean value of the analysis data"""
 96 | 
 97 |         values = data[0]
 98 | 
 99 |         output = np.empty(len(values))
100 |         for ind, i in enumerate(values):
101 |             mean_i = np.mean(i)
102 |             if mean_i == 0:
103 |                 output[ind] = np.nan
104 |             else:
105 |                 output[ind] = np.log10(np.mean(i))/self.nyquist_rate
106 |         return output
107 | 
108 |     def median_formatter(self, data):
109 |         """Calculate the median value of the analysis data"""
110 |         values = data[0]
111 | 
112 |         output = np.empty(len(data))
113 |         for ind, i in enumerate(values):
114 |             median_i = np.median(i)
115 |             if median_i == 0:
116 |                 output[ind] = np.nan
117 |             else:
118 |                 output[ind] = np.log10(np.median(i))/self.nyquist_rate
119 |         return output
120 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/SpectralFlatnessAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import scipy.stats as stats
  3 | import numpy as np
  4 | import logging
  5 | import pdb
  6 | import warnings
  7 | 
  8 | from Analysis import Analysis
  9 | 
 10 | class SpectralFlatnessAnalysis(Analysis):
 11 |     """
 12 |     Spectral flatness descriptor class for generation of spectral flatness
 13 |     audio analysis.
 14 | 
 15 |     This descriptor calculates the spectral flatness for overlapping grains of
 16 |     an AnalysedAudioFile object.  A full definition can be found in the
 17 |     documentation.
 18 | 
 19 |     Arguments:
 20 | 
 21 |     - analysis_group: the HDF5 file group to use for the storage of the
 22 |       analysis.
 23 | 
 24 |     - config: The configuration module used to configure the analysis
 25 |     """
 26 | 
 27 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 28 |         super(SpectralFlatnessAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'SpcFlatness')
 29 |         # Create logger for module
 30 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 31 |         # Store reference to the file to be analysed
 32 |         self.AnalysedAudioFile = AnalysedAudioFile
 33 |         self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2.
 34 |         try:
 35 |             fft = self.AnalysedAudioFile.analyses["fft"]
 36 |         except KeyError:
 37 |             raise KeyError("FFT analysis is required for spectral spread "
 38 |                              "analysis.")
 39 | 
 40 |         self.analysis_group = analysis_group
 41 |         self.logger.info("Creating Spectral Flatness analysis for {0}".format(self.AnalysedAudioFile.name))
 42 |         self.create_analysis(
 43 |             self.create_spcflatness_analysis,
 44 |             fft.analysis['frames'],
 45 |         )
 46 |         self.spcflatness_window_count = None
 47 | 
 48 |     def hdf5_dataset_formatter(self, analysis_method, *args, **kwargs):
 49 |         '''
 50 |         Formats the output from the analysis method to save to the HDF5 file.
 51 |         '''
 52 |         samplerate = self.AnalysedAudioFile.samplerate
 53 |         output = self.create_spcflatness_analysis(*args, **kwargs)
 54 |         times = self.calc_spcflatness_frame_times(output, self.AnalysedAudioFile.frames, samplerate)
 55 |         return ({'frames': output, 'times': times}, {})
 56 | 
 57 |     @staticmethod
 58 |     def create_spcflatness_analysis(fft):
 59 |         '''
 60 |         Calculate the spectral flatness of the fft frames.
 61 |         '''
 62 |         fft = fft[:]
 63 |         # Get the positive magnitudes of each bin.
 64 |         magnitudes = np.abs(fft)
 65 |         if not np.nonzero(magnitudes)[0].size:
 66 |             y = np.empty(magnitudes.shape[0])
 67 |             y.fill(np.nan)
 68 |             return y
 69 | 
 70 |         # Calculate the ratio between the two.
 71 |         with warnings.catch_warnings():
 72 |             warnings.filterwarnings('ignore')
 73 |             # Calculate the geometric mean of magnitudes
 74 |             geo_mean = np.e**np.mean(np.log(magnitudes), axis=1)
 75 |             # Calculate the arithmetic mean of magnitudes
 76 |             arith_mean = np.mean(magnitudes, axis=1)
 77 |             spectral_flatness = geo_mean / arith_mean
 78 | 
 79 |         return spectral_flatness
 80 | 
 81 |     @staticmethod
 82 |     def calc_spcflatness_frame_times(spcflatness_frames, sample_frame_count, samplerate):
 83 | 
 84 |         """Calculate times for frames using sample size and samplerate."""
 85 | 
 86 |         # Get number of frames for time and frequency
 87 |         timebins = spcflatness_frames.shape[0]
 88 |         # Create array ranging from 0 to number of time frames
 89 |         scale = np.arange(timebins+1)
 90 |         # divide the number of samples by the total number of frames, then
 91 |         # multiply by the frame numbers.
 92 |         spcflatness_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float)
 93 |         # Divide by the samplerate to give times in seconds
 94 |         spcflatness_times = spcflatness_times / samplerate
 95 |         return spcflatness_times
 96 | 
 97 |     def mean_formatter(self, data):
 98 |         """Calculate the mean value of the analysis data"""
 99 | 
100 |         values = data[0]
101 | 
102 |         output = np.empty(len(values))
103 |         for ind, i in enumerate(values):
104 |             mean_i = np.mean(i)
105 |             if mean_i == 0:
106 |                 output[ind] = np.nan
107 |             else:
108 |                 output[ind] = np.log10(np.mean(i))/self.nyquist_rate
109 |         return output
110 | 
111 |     def median_formatter(self, data):
112 |         """Calculate the median value of the analysis data"""
113 |         values = data[0]
114 | 
115 |         output = np.empty(len(data))
116 |         for ind, i in enumerate(values):
117 |             median_i = np.median(i)
118 |             if median_i == 0:
119 |                 output[ind] = np.nan
120 |             else:
121 |                 output[ind] = np.log10(np.median(i))/self.nyquist_rate
122 |         return output
123 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/SpectralFluxAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import numpy as np
  3 | import logging
  4 | import pdb
  5 | 
  6 | from Analysis import Analysis
  7 | 
  8 | class SpectralFluxAnalysis(Analysis):
  9 |     """
 10 |     Spectral flux descriptor class for generation of spectral flux audio
 11 |     analysis.
 12 | 
 13 |     This descriptor calculates the spectral flux for overlapping grains of an
 14 |     AnalysedAudioFile object.  A full definition of spectral flux analysis can
 15 |     be found in the documentation.
 16 | 
 17 |     Arguments:
 18 | 
 19 |     - analysis_group: the HDF5 file group to use for the storage of the
 20 |       analysis.
 21 | 
 22 |     - config: The configuration module used to configure the analysis
 23 |     """
 24 | 
 25 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 26 |         super(SpectralFluxAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'SpcFlux')
 27 |         # Create logger for module
 28 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 29 |         # Store reference to the file to be analysed
 30 |         self.AnalysedAudioFile = AnalysedAudioFile
 31 |         self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2.
 32 |         try:
 33 |             fft = self.AnalysedAudioFile.analyses["fft"]
 34 |         except KeyError:
 35 |             raise KeyError("FFT analysis is required for spectral spread "
 36 |                              "analysis.")
 37 | 
 38 |         self.analysis_group = analysis_group
 39 |         self.logger.info("Creating Spectral Flux analysis for {0}".format(self.AnalysedAudioFile.name))
 40 |         self.create_analysis(
 41 |             self.create_spcflux_analysis,
 42 |             fft.analysis['frames'],
 43 |         )
 44 |         self.spcflux_window_count = None
 45 | 
 46 |     def hdf5_dataset_formatter(self, analysis_method, *args, **kwargs):
 47 |         '''
 48 |         Formats the output from the analysis method to save to the HDF5 file.
 49 |         '''
 50 |         samplerate = self.AnalysedAudioFile.samplerate
 51 |         output = self.create_spcflux_analysis(*args, **kwargs)
 52 |         times = self.calc_spcflux_frame_times(output, self.AnalysedAudioFile.frames, samplerate)
 53 |         return ({'frames': output, 'times': times}, {})
 54 | 
 55 |     @staticmethod
 56 |     def create_spcflux_analysis(fft):
 57 |         '''
 58 |         Calculate the spectral flux of the fft frames.
 59 | 
 60 |         length: the length of the window used to calculate the FFT.
 61 |         output_format = Choose either "freq" for output in Hz or "ind" for bin
 62 |         index output
 63 |         '''
 64 |         fft = fft[:]
 65 |         # Get the positive magnitudes of each bin.
 66 |         magnitudes = np.abs(fft)
 67 |         if not np.nonzero(magnitudes)[0].size:
 68 |             y = np.empty(magnitudes.shape[0])
 69 |             y.fill(np.nan)
 70 |             return y
 71 |         # Roll magnitudes as flux is calculated using the difference between
 72 |         # consecutive magnitudes. Rolling allows for quick access to previous
 73 |         # magnitude.
 74 |         rolled_mags = np.roll(magnitudes, 1, axis=0)[1:]
 75 |         sum_of_squares = np.sum((magnitudes[1:]-rolled_mags)**2., axis=1)
 76 |         spectral_flux = np.sqrt(sum_of_squares) / (np.size(fft, axis=1))
 77 | 
 78 |         return spectral_flux
 79 | 
 80 |     @staticmethod
 81 |     def calc_spcflux_frame_times(spcflux_frames, sample_frame_count, samplerate):
 82 | 
 83 |         """Calculate times for frames using sample size and samplerate."""
 84 | 
 85 |         # Get number of frames for time and frequency
 86 |         timebins = spcflux_frames.shape[0]
 87 |         if not timebins:
 88 |             return np.array([])
 89 |         # Create array ranging from 0 to number of time frames
 90 |         scale = np.arange(timebins+1)
 91 |         # divide the number of samples by the total number of frames, then
 92 |         # multiply by the frame numbers.
 93 |         spcflux_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float)
 94 |         # Divide by the samplerate to give times in seconds
 95 |         spcflux_times = spcflux_times / samplerate
 96 |         return spcflux_times
 97 | 
 98 |     def mean_formatter(self, data):
 99 |         """Calculate the mean value of the analysis data"""
100 | 
101 |         values = data[0]
102 | 
103 |         output = np.empty(len(values))
104 |         for ind, i in enumerate(values):
105 |             mean_i = np.mean(i)
106 |             if mean_i == 0:
107 |                 output[ind] = np.nan
108 |             else:
109 |                 output[ind] = np.log10(np.mean(i))/self.nyquist_rate
110 |         return output
111 | 
112 |     def median_formatter(self, data):
113 |         """Calculate the median value of the analysis data"""
114 |         values = data[0]
115 | 
116 |         output = np.empty(len(data))
117 |         for ind, i in enumerate(values):
118 |             median_i = np.median(i)
119 |             if median_i == 0:
120 |                 output[ind] = np.nan
121 |             else:
122 |                 output[ind] = np.log10(np.median(i))/self.nyquist_rate
123 |         return output
124 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/SkewnessAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import os
  3 | import numpy as np
  4 | import logging
  5 | from scipy import signal
  6 | from numpy.lib import stride_tricks
  7 | import pdb
  8 | 
  9 | from fileops import pathops
 10 | 
 11 | from Analysis import Analysis
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | class SkewnessAnalysis(Analysis):
 17 | 
 18 |     """
 19 |     Skewness descriptor class for generation of temporal skewness audio analysis.
 20 | 
 21 |     This descriptor calculates thetemporal skewness for overlapping grains of
 22 |     an AnalysedAudioFile object.  A full definition of skewness analysis can be
 23 |     found in the documentation.
 24 | 
 25 |     Arguments:
 26 | 
 27 |     - analysis_group: the HDF5 file group to use for the storage of the
 28 |       analysis.
 29 | 
 30 |     - config: The configuration module used to configure the analysis
 31 |     """
 32 | 
 33 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 34 |         super(SkewnessAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'skewness')
 35 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 36 |         # Store reference to the file to be analysed
 37 |         self.AnalysedAudioFile = AnalysedAudioFile
 38 | 
 39 |         if config:
 40 |             self.window_size = config.skewness["window_size"] * self.AnalysedAudioFile.samplerate / 1000
 41 |             self.overlap = 1. / config.skewness["overlap"]
 42 | 
 43 |         try:
 44 |             variance = self.AnalysedAudioFile.analyses["variance"]
 45 |         except KeyError:
 46 |             raise KeyError("Variance analysis is required for skewness "
 47 |                              "analysis.")
 48 | 
 49 |         self.analysis_group = analysis_group
 50 |         self.logger.info("Creating skewness analysis for {0}".format(self.AnalysedAudioFile.name))
 51 |         self.create_analysis(frames, variance.analysis['frames'][:], self.window_size, overlapFac=self.overlap)
 52 | 
 53 |     @staticmethod
 54 |     def create_skewness_analysis(
 55 |         frames,
 56 |         variance,
 57 |         window_size=512,
 58 |         window=signal.hanning,
 59 |         overlapFac=0.5
 60 |     ):
 61 |         """
 62 |         Calculate the skewness values of windowed segments of the audio file and
 63 |         save to disk.
 64 |         """
 65 |         if hasattr(frames, '__call__'):
 66 |             frames = frames()
 67 |         # Calculate the period of the window in hz
 68 |         # lowest_freq = 1.0 / window_size
 69 |         # Filter frequencies lower than the period of the window
 70 |         # filter = ButterFilter()
 71 |         # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate)
 72 |         # TODO: Fix filter
 73 |         # frames = filter.filter_butter(frames)
 74 | 
 75 |         hopSize = int(window_size - np.floor(overlapFac * window_size))
 76 | 
 77 |         # zeros at beginning (thus center of 1st window should be for sample nr. 0)
 78 |         samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
 79 | 
 80 |         # cols for windowing
 81 |         cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
 82 |         # zeros at end (thus samples can be fully covered by frames)
 83 |         samples = np.append(samples, np.zeros(window_size))
 84 | 
 85 |         frames = stride_tricks.as_strided(
 86 |             samples,
 87 |             shape=(cols, window_size),
 88 |             strides=(samples.strides[0]*hopSize, samples.strides[0])
 89 |         ).copy()
 90 | 
 91 |         if window:
 92 |             win = window(window_size)
 93 |             frames *= win
 94 | 
 95 |         frame_mean = np.mean(frames, axis=1)
 96 | 
 97 |         variance_cubed = np.sqrt(variance)**3
 98 | 
 99 |         a =  ((1 / window_size)) * np.sum(((frames-np.vstack(frame_mean))**3), axis=1)
100 |         skewness = a / variance_cubed
101 | 
102 |         return skewness
103 | 
104 |     def hdf5_dataset_formatter(self, *args, **kwargs):
105 |         '''
106 |         Formats the output from the analysis method to save to the HDF5 file.
107 |         '''
108 |         samplerate = self.AnalysedAudioFile.samplerate
109 |         skewness = self.create_skewness_analysis(*args, **kwargs)
110 |         skewness_times = self.calc_skewness_frame_times(skewness, args[0], samplerate)
111 |         return ({'frames': skewness, 'times': skewness_times}, {})
112 | 
113 |     @staticmethod
114 |     def calc_skewness_frame_times(skewnessframes, sample_frames, samplerate):
115 | 
116 |         """Calculate times for frames using sample size and samplerate."""
117 | 
118 |         if hasattr(sample_frames, '__call__'):
119 |             sample_frames = sample_frames()
120 |         # Get number of frames for time and frequency
121 |         timebins = skewnessframes.shape[0]
122 |         # Create array ranging from 0 to number of time frames
123 |         scale = np.arange(timebins+1)
124 |         # divide the number of samples by the total number of frames, then
125 |         # multiply by the frame numbers.
126 |         skewness_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
127 |         # Divide by the samplerate to give times in seconds
128 |         skewness_times = skewness_times / samplerate
129 |         return skewness_times
130 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/KurtosisAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import os
  3 | import numpy as np
  4 | import logging
  5 | from scipy import signal
  6 | from numpy.lib import stride_tricks
  7 | import pdb
  8 | 
  9 | from fileops import pathops
 10 | 
 11 | from Analysis import Analysis
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | class KurtosisAnalysis(Analysis):
 17 | 
 18 |     """
 19 |     Kurtosis descriptor class for generation of Kurtosis audio analysis.
 20 | 
 21 |     This descriptor calculates the temporal kurtosis for overlapping grains of
 22 |     an AnalysedAudioFile object.  A full definition of kurtosis analysis can be found
 23 |     in the documentation.
 24 | 
 25 |     Arguments:
 26 | 
 27 |     - analysis_group: the HDF5 file group to use for the storage of the
 28 |       analysis.
 29 | 
 30 |     - config: The configuration module used to configure the analysis
 31 |     """
 32 | 
 33 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 34 |         super(KurtosisAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'kurtosis')
 35 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 36 |         # Store reference to the file to be analysed
 37 |         self.AnalysedAudioFile = AnalysedAudioFile
 38 | 
 39 |         if config:
 40 |             self.window_size = config.kurtosis["window_size"] * self.AnalysedAudioFile.samplerate / 1000
 41 |             self.overlap = 1. / config.kurtosis["overlap"]
 42 | 
 43 |         try:
 44 |             variance = self.AnalysedAudioFile.analyses["variance"]
 45 |         except KeyError:
 46 |             raise KeyError("Variance analysis is required for Kurtosis "
 47 |                              "analysis.")
 48 | 
 49 |         self.analysis_group = analysis_group
 50 |         self.logger.info("Creating kurtosis analysis for {0}".format(self.AnalysedAudioFile.name))
 51 |         self.create_analysis(frames, variance.analysis['frames'][:], self.window_size, overlapFac=self.overlap)
 52 | 
 53 |     @staticmethod
 54 |     def create_kurtosis_analysis(
 55 |         frames,
 56 |         variance,
 57 |         window_size=512,
 58 |         window=signal.hanning,
 59 |         overlapFac=0.5
 60 |     ):
 61 |         """
 62 |         Calculate the Kurtosis values of windowed segments of the audio file and
 63 |         save to disk.
 64 |         """
 65 |         if hasattr(frames, '__call__'):
 66 |             frames = frames()
 67 |         # Calculate the period of the window in hz
 68 |         # lowest_freq = 1.0 / window_size
 69 |         # Filter frequencies lower than the period of the window
 70 |         # filter = ButterFilter()
 71 |         # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate)
 72 |         # TODO: Fix filter
 73 |         # frames = filter.filter_butter(frames)
 74 | 
 75 |         hopSize = int(window_size - np.floor(overlapFac * window_size))
 76 | 
 77 |         # zeros at beginning (thus center of 1st window should be for sample nr. 0)
 78 |         samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
 79 | 
 80 |         # cols for windowing
 81 |         cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
 82 |         # zeros at end (thus samples can be fully covered by frames)
 83 |         samples = np.append(samples, np.zeros(window_size))
 84 | 
 85 |         frames = stride_tricks.as_strided(
 86 |             samples,
 87 |             shape=(cols, window_size),
 88 |             strides=(samples.strides[0]*hopSize, samples.strides[0])
 89 |         ).copy()
 90 | 
 91 |         if window:
 92 |             win = window(window_size)
 93 |             frames *= win
 94 | 
 95 |         frame_mean = np.mean(frames, axis=1)
 96 | 
 97 |         variance_sqrd = variance**2
 98 | 
 99 |         a =  ((1 / window_size)) * np.sum(((frames-np.vstack(frame_mean))**4), axis=1)
100 |         kurtosis = a / variance_sqrd
101 |         kurtosis -= 3
102 | 
103 |         return kurtosis
104 | 
105 |     def hdf5_dataset_formatter(self, *args, **kwargs):
106 |         '''
107 |         Formats the output from the analysis method to save to the HDF5 file.
108 |         '''
109 |         samplerate = self.AnalysedAudioFile.samplerate
110 |         kurtosis = self.create_kurtosis_analysis(*args, **kwargs)
111 |         kurtosis_times = self.calc_kurtosis_frame_times(kurtosis, args[0], samplerate)
112 |         return ({'frames': kurtosis, 'times': kurtosis_times}, {})
113 | 
114 |     @staticmethod
115 |     def calc_kurtosis_frame_times(kurtosisframes, sample_frames, samplerate):
116 | 
117 |         """Calculate times for frames using sample size and samplerate."""
118 | 
119 |         if hasattr(sample_frames, '__call__'):
120 |             sample_frames = sample_frames()
121 |         # Get number of frames for time and frequency
122 |         timebins = kurtosisframes.shape[0]
123 |         # Create array ranging from 0 to number of time frames
124 |         scale = np.arange(timebins+1)
125 |         # divide the number of samples by the total number of frames, then
126 |         # multiply by the frame numbers.
127 |         kurtosis_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
128 |         # Divide by the samplerate to give times in seconds
129 |         kurtosis_times = kurtosis_times / samplerate
130 |         return kurtosis_times
131 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/RMSAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import os
  3 | import numpy as np
  4 | import logging
  5 | from scipy import signal
  6 | from numpy.lib import stride_tricks
  7 | import pdb
  8 | from scipy.signal import butter, lfilter
  9 | 
 10 | 
 11 | from AnalysisTools import ButterFilter
 12 | from fileops import pathops
 13 | 
 14 | from Analysis import Analysis
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | class RMSAnalysis(Analysis):
 20 | 
 21 |     """
 22 |     RMS descriptor class for generation of RMS audio analysis.
 23 | 
 24 |     This descriptor calculates the Root Mean Square analysis for overlapping
 25 |     grains of an AnalysedAudioFile object.  A full definition of RMS analysis
 26 |     can be found in the documentation.
 27 | 
 28 |     Arguments:
 29 | 
 30 |     - analysis_group: the HDF5 file group to use for the storage of the
 31 |       analysis.
 32 | 
 33 |     - config: The configuration module used to configure the analysis
 34 |     """
 35 | 
 36 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 37 |         super(RMSAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'RMS')
 38 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 39 |         # Store reference to the file to be analysed
 40 |         self.AnalysedAudioFile = AnalysedAudioFile
 41 | 
 42 |         if config:
 43 |             self.window_size = config.rms["window_size"] * self.AnalysedAudioFile.samplerate / 1000
 44 |             self.overlap = 1. / config.rms["overlap"]
 45 |         else:
 46 |             self.window_size=512
 47 |             self.overlap = 0.5
 48 | 
 49 |         self.analysis_group = analysis_group
 50 |         self.logger.info("Creating RMS analysis for {0}".format(self.AnalysedAudioFile.name))
 51 |         self.create_analysis(frames, self.AnalysedAudioFile.samplerate, window_size=self.window_size, overlapFac=self.overlap, )
 52 | 
 53 |     @staticmethod
 54 |     def create_rms_analysis(
 55 |         frames,
 56 |         samplerate,
 57 |         window_size=512,
 58 |         window=signal.hanning,
 59 |         overlapFac=0.5
 60 |     ):
 61 |         """
 62 |         Generate RMS contour analysis.
 63 | 
 64 |         Calculate the RMS values of windowed segments of the audio file and
 65 |         save to disk.
 66 |         """
 67 |         if hasattr(frames, '__call__'):
 68 |             frames = frames()
 69 |         def butter_lowpass(cutoff, fs, order=5):
 70 |             # red: taken from http://stackoverflow.com/questions/25191620/creating-lowpass-filter-in-scipy-understanding-methods-and-units
 71 |             nyq = 0.5 * fs
 72 |             normal_cutoff = cutoff / nyq
 73 |             b, a = butter(order, normal_cutoff, btype='highpass', analog=False)
 74 |             return b, a
 75 |         def butter_lowpass_filter(data, cutoff, fs, order=5):
 76 |             # red: taken from http://stackoverflow.com/questions/25191620/creating-lowpass-filter-in-scipy-understanding-methods-and-units
 77 |             b, a = butter_lowpass(cutoff, fs, order=order)
 78 |             y = lfilter(b, a, data)
 79 |             return y
 80 | 
 81 | 
 82 |         # Calculate the period of the window in hz
 83 |         lowest_freq = 1.0 / (window_size / samplerate)
 84 |         frames = butter_lowpass_filter(frames, lowest_freq, samplerate)
 85 | 
 86 | 
 87 |         # Generate a window function to apply to rms windows before analysis
 88 |         hopSize = int(window_size - np.floor(overlapFac * window_size))
 89 | 
 90 |         # zeros at beginning (thus center of 1st window should be for sample nr. 0)
 91 |         samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
 92 | 
 93 |         # cols for windowing
 94 |         cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
 95 |         # zeros at end (thus samples can be fully covered by frames)
 96 |         samples = np.append(samples, np.zeros(window_size))
 97 | 
 98 |         frames = stride_tricks.as_strided(
 99 |             samples,
100 |             shape=(cols, window_size),
101 |             strides=(samples.strides[0]*hopSize, samples.strides[0])
102 |         ).copy()
103 | 
104 |         if window:
105 |             win = window(window_size)
106 |             frames *= win
107 |         rms = np.sqrt(np.mean(np.square(np.abs(frames)), axis=1))
108 | 
109 |         return rms
110 | 
111 | 
112 |     def hdf5_dataset_formatter(self, *args, **kwargs):
113 |         '''
114 |         Formats the output from the analysis method to save to the HDF5 file.
115 |         '''
116 |         samplerate = self.AnalysedAudioFile.samplerate
117 |         rms = self.create_rms_analysis(*args, **kwargs)
118 |         rms_times = self.calc_rms_frame_times(rms, args[0], samplerate)
119 |         return ({'frames': rms, 'times': rms_times}, {})
120 | 
121 |     @staticmethod
122 |     def calc_rms_frame_times(rmsframes, sample_frames, samplerate):
123 | 
124 |         """Calculate times for frames using sample size and samplerate."""
125 | 
126 |         if hasattr(sample_frames, '__call__'):
127 |             sample_frames = sample_frames()
128 |         # Get number of frames for time and frequency
129 |         timebins = rmsframes.shape[0]
130 |         # Create array ranging from 0 to number of time frames
131 |         scale = np.arange(timebins+1)
132 |         # divide the number of samples by the total number of frames, then
133 |         # multiply by the frame numbers.
134 |         rms_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
135 |         # Divide by the samplerate to give times in seconds
136 |         rms_times = rms_times / samplerate
137 |         return rms_times
138 | 


--------------------------------------------------------------------------------
/src/sppysound/Examples/Database Matching Example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 15,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from sppysound.database import AudioDatabase, Matcher\n",
 12 |     "import matching_config"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 16,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "source_dir = \"./ExampleDatabase\"\n",
 24 |     "target_dir = \"./ExampleTarget\"\n",
 25 |     "output_dir = \"./ExampleOutput\"\n",
 26 |     "analysis_list = [\"rms\", \"f0\"]"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "Load source and target databases for matching..."
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 10,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "source_database = AudioDatabase(source_dir, analysis_list=analysis_list, config=matching_config)\n",
 45 |     "source_database.load_database(reanalyse=False)\n",
 46 |     "target_database = AudioDatabase(target_dir, analysis_list=analysis_list, config=matching_config)\n",
 47 |     "target_database.load_database(reanalyse=False)"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "An output database must also be defined. This is to store matching results and synthesis results generated later.\n",
 55 |     "Note that an analysis list was not defined for this as it will not be analysed"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 11,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "output_database = AudioDatabase(output_dir, config=matching_config)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "The database must still be loaded to check for previous HDF5 files to use for results"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 12,
 79 |    "metadata": {
 80 |     "collapsed": true
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "output_database.load_database(reanalyse=False)"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "A matcher object is then created using the loaded databases, ready to perform matching. The rematch argument can be set to discard any previously found matches from pre-existing HDF5 files, otherwise previously found matches will cause the program to terminate for their preservation."
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 13,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "matcher = Matcher(\n",
103 |     "    source_database,\n",
104 |     "    target_database,\n",
105 |     "    output_db=output_database,\n",
106 |     "    config=matching_config,\n",
107 |     "    rematch=True\n",
108 |     ")"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "The matching is then run using the brute force matcher method. Other methods are not currently available.\n",
116 |     "\n",
117 |     "Warnings may be produced during this process. These will be silenced in a future revision but do not affect results."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 14,
123 |    "metadata": {
124 |     "collapsed": false
125 |    },
126 |    "outputs": [],
127 |    "source": [
128 |     "matcher.match(\n",
129 |     "    matcher.brute_force_matcher,\n",
130 |     ")"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "The output database will now contain a HDF5 file containing matching data for the two databases. This can be used to synthesize results."
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "The matching_config file for this demo is:\n",
145 |     "~~~python\n",
146 |     "rms = {\n",
147 |     "    \"window_size\": 100,\n",
148 |     "    \"overlap\": 2,\n",
149 |     "}\n",
150 |     "\n",
151 |     "analysis_dict = {\n",
152 |     "    \"f0\": \"log2_median\",\n",
153 |     "    \"rms\": \"mean\"\n",
154 |     "}\n",
155 |     "\n",
156 |     "matcher_weightings = {\n",
157 |     "    \"f0\" : 1.,\n",
158 |     "    \"rms\": 1.\n",
159 |     "}\n",
160 |     "\n",
161 |     "analysis = {\n",
162 |     "    \"reanalyse\": False\n",
163 |     "}\n",
164 |     "\n",
165 |     "matcher = {\n",
166 |     "    \"rematch\": False,\n",
167 |     "    \"grain_size\": 100,\n",
168 |     "    \"overlap\": 2,\n",
169 |     "    # Defines the number of matches to keep for synthesis.\n",
170 |     "    \"match_quantity\": 20\n",
171 |     "}\n",
172 |     "\n",
173 |     "output_file = {\n",
174 |     "    \"samplerate\": 44100,\n",
175 |     "    \"format\": 131075,\n",
176 |     "    \"channels\": 1\n",
177 |     "}\n",
178 |     "~~~"
179 |    ]
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "kernelspec": {
184 |    "display_name": "Python 2",
185 |    "language": "python",
186 |    "name": "python2"
187 |   },
188 |   "language_info": {
189 |    "codemirror_mode": {
190 |     "name": "ipython",
191 |     "version": 2
192 |    },
193 |    "file_extension": ".py",
194 |    "mimetype": "text/x-python",
195 |    "name": "python",
196 |    "nbconvert_exporter": "python",
197 |    "pygments_lexer": "ipython2",
198 |    "version": "2.7.10"
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 0
203 | }
204 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/SpectralSpreadAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import numpy as np
  3 | import logging
  4 | import pdb
  5 | 
  6 | from Analysis import Analysis
  7 | 
  8 | class SpectralSpreadAnalysis(Analysis):
  9 |     """
 10 |     Spectral spread descriptor class for generation of spectral spread audio
 11 |     analysis.
 12 | 
 13 |     This descriptor calculates the spectral spread for overlapping grains of an
 14 |     AnalysedAudioFile object.  A full definition of spectral spread analysis can
 15 |     be found in the documentation.
 16 | 
 17 |     Arguments:
 18 | 
 19 |     - analysis_group: the HDF5 file group to use for the storage of the
 20 |       analysis.
 21 | 
 22 |     - config: The configuration module used to configure the analysis
 23 |     """
 24 | 
 25 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 26 |         super(SpectralSpreadAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'SpcSprd')
 27 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 28 |         # Store reference to the file to be analysed
 29 |         self.AnalysedAudioFile = AnalysedAudioFile
 30 |         self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2.
 31 |         try:
 32 |             spccntr = self.AnalysedAudioFile.analyses["spccntr"]
 33 |         except KeyError:
 34 |             raise KeyError("Spectral Centroid analysis is required for "
 35 |                              "spectral spread analysis.")
 36 |         try:
 37 |             fft = self.AnalysedAudioFile.analyses["fft"]
 38 |         except KeyError:
 39 |             raise KeyError("FFT analysis is required for spectral spread "
 40 |                              "analysis.")
 41 | 
 42 |         self.analysis_group = analysis_group
 43 |         self.logger.info("Creating Spectral Spread analysis for {0}".format(self.AnalysedAudioFile.name))
 44 |         self.create_analysis(
 45 |             fft.analysis['frames'],
 46 |             spccntr.analysis['frames'],
 47 |             self.AnalysedAudioFile.samplerate
 48 |         )
 49 |         self.spccntr_window_count = None
 50 | 
 51 |     def hdf5_dataset_formatter(self, *args, **kwargs):
 52 |         '''
 53 |         Formats the output from the analysis method to save to the HDF5 file.
 54 |         '''
 55 |         samplerate = self.AnalysedAudioFile.samplerate
 56 |         output = self.create_spcsprd_analysis(*args, **kwargs)
 57 |         times = self.calc_spcsprd_frame_times(output, self.AnalysedAudioFile.frames, samplerate)
 58 |         return ({'frames': output, 'times': times}, {})
 59 | 
 60 |     @staticmethod
 61 |     def create_spcsprd_analysis(fft, spectral_centroid, samplerate, output_format = "ind"):
 62 |         '''
 63 |         Calculate the spectral spread of the fft frames.
 64 | 
 65 |         fft: Real fft frames.
 66 |         spectral_centroid: spectral centroid frames (in index format).
 67 |         length: the length of the window used to calculate the FFT.
 68 |         samplerate: the samplerate of the audio analysed.
 69 |         '''
 70 |         fft = fft[:]
 71 |         spectral_centroid = spectral_centroid[:]
 72 |         # Get the positive magnitudes of each bin.
 73 |         magnitudes = np.abs(fft)
 74 |         mag_max = np.max(magnitudes)
 75 |         if not mag_max:
 76 |             y = np.empty(magnitudes.shape[0])
 77 |             y.fill(np.nan)
 78 |             return y
 79 |         # Get the index for each bin
 80 |         if output_format == "ind":
 81 |             freqs = np.arange(np.size(fft, axis=1))
 82 |         elif output_format == "freq":
 83 |             freqs = np.fft.rfftfreq((np.size(fft, axis=1)*2)-1, 1.0/samplerate)
 84 |         else:
 85 |             raise ValueError("\'{0}\' is not a valid output "
 86 |                              "format.".format(output_format))
 87 | 
 88 |         spectral_centroid = np.vstack(spectral_centroid)
 89 | 
 90 |         a = (freqs-spectral_centroid)**2
 91 |         mag_sqrd = magnitudes**2
 92 |         # Calculate the weighted mean
 93 |         y = np.sqrt(np.sum(a*mag_sqrd, axis=1) / (np.sum(mag_sqrd, axis=1)))
 94 | 
 95 |         return y
 96 | 
 97 |     @staticmethod
 98 |     def calc_spcsprd_frame_times(spcsprd_frames, sample_frame_count, samplerate):
 99 | 
100 |         """Calculate times for frames using sample size and samplerate."""
101 | 
102 |         # Get number of frames for time and frequency
103 |         timebins = spcsprd_frames.shape[0]
104 |         # Create array ranging from 0 to number of time frames
105 |         scale = np.arange(timebins+1)
106 |         # divide the number of samples by the total number of frames, then
107 |         # multiply by the frame numbers.
108 |         spcsprd_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float)
109 |         # Divide by the samplerate to give times in seconds
110 |         spcsprd_times = spcsprd_times / samplerate
111 |         return spcsprd_times
112 | 
113 |     def mean_formatter(self, data):
114 |         """Calculate the mean value of the analysis data"""
115 | 
116 |         values = data[0]
117 | 
118 |         output = np.empty(len(values))
119 |         for ind, i in enumerate(values):
120 |             mean_i = np.mean(i)
121 |             if mean_i == 0:
122 |                 output[ind] = np.nan
123 |             else:
124 |                 output[ind] = np.log10(np.mean(i))/self.nyquist_rate
125 |         return output
126 | 
127 |     def median_formatter(self, data):
128 |         """Calculate the median value of the analysis data"""
129 |         values = data[0]
130 | 
131 |         output = np.empty(len(data))
132 |         for ind, i in enumerate(values):
133 |             median_i = np.median(i)
134 |             if median_i == 0:
135 |                 output[ind] = np.nan
136 |             else:
137 |                 output[ind] = np.log10(np.median(i))/self.nyquist_rate
138 |         return output
139 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/DatabaseMatchingExample.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Matching a target sample to a database"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from sppysound.database import AudioDatabase, Matcher\n",
 19 |     "import matching_config"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "metadata": {
 26 |     "collapsed": true
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "source_dir = \"./ExampleDatabase\"\n",
 31 |     "target_dir = \"./ExampleTarget\"\n",
 32 |     "output_dir = \"./ExampleOutput\"\n",
 33 |     "analysis_list = [\"rms\", \"f0\"]"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "Load source and target databases for matching..."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "metadata": {
 47 |     "collapsed": false
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "source_database = AudioDatabase(\n",
 52 |     "    source_dir, \n",
 53 |     "    analysis_list=analysis_list, \n",
 54 |     "    config=matching_config\n",
 55 |     ")\n",
 56 |     "source_database.load_database(reanalyse=True)\n",
 57 |     "target_database = AudioDatabase(\n",
 58 |     "    target_dir, \n",
 59 |     "    analysis_list=analysis_list, \n",
 60 |     "    config=matching_config\n",
 61 |     ")\n",
 62 |     "target_database.load_database(reanalyse=True)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "An output database must also be defined. This is to store matching results and synthesis results generated later.\n",
 70 |     "Note that an analysis list was not defined for this as it will not be analysed"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 4,
 76 |    "metadata": {
 77 |     "collapsed": false
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "output_database = AudioDatabase(output_dir, config=matching_config)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "The database must still be loaded to check for previous HDF5 files to use for results"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 5,
 94 |    "metadata": {
 95 |     "collapsed": false
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "output_database.load_database(reanalyse=False)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "A matcher object is then created using the loaded databases, ready to perform matching. The rematch argument can be set to discard any previously found matches from pre-existing HDF5 files, otherwise previously found matches will cause the program to terminate for their preservation."
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 6,
112 |    "metadata": {
113 |     "collapsed": false
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "matcher = Matcher(\n",
118 |     "    source_database,\n",
119 |     "    target_database,\n",
120 |     "    output_db=output_database,\n",
121 |     "    config=matching_config,\n",
122 |     "    rematch=True\n",
123 |     ")"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "The matching is then run using the brute force matcher method. Other methods are not currently available.\n",
131 |     "\n",
132 |     "Warnings may be produced during this process. These will be silenced in a future revision but do not affect results."
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 7,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "matcher.match(\n",
144 |     "    matcher.kdtree_matcher,\n",
145 |     ")"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "The output database will now contain a HDF5 file containing matching data for the two databases. This can be used to synthesize results."
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "The matching_config file for this demo is:\n",
160 |     "\n",
161 |     "~~~python\n",
162 |     "rms = {\n",
163 |     "    \"window_size\": 100,\n",
164 |     "    \"overlap\": 2,\n",
165 |     "}\n",
166 |     "\n",
167 |     "analysis_dict = {\n",
168 |     "    \"f0\": \"log2_median\",\n",
169 |     "    \"rms\": \"mean\"\n",
170 |     "}\n",
171 |     "\n",
172 |     "matcher_weightings = {\n",
173 |     "    \"f0\" : 1.,\n",
174 |     "    \"rms\": 1.\n",
175 |     "}\n",
176 |     "\n",
177 |     "analysis = {\n",
178 |     "    \"reanalyse\": False\n",
179 |     "}\n",
180 |     "\n",
181 |     "matcher = {\n",
182 |     "    \"rematch\": False,\n",
183 |     "    \"grain_size\": 100,\n",
184 |     "    \"overlap\": 2,\n",
185 |     "    # Defines the number of matches to keep for synthesis.\n",
186 |     "    \"match_quantity\": 20\n",
187 |     "}\n",
188 |     "\n",
189 |     "output_file = {\n",
190 |     "    \"samplerate\": 44100,\n",
191 |     "    \"format\": 131075,\n",
192 |     "    \"channels\": 1\n",
193 |     "}\n",
194 |     "~~~"
195 |    ]
196 |   }
197 |  ],
198 |  "metadata": {
199 |   "kernelspec": {
200 |    "display_name": "Python 2",
201 |    "language": "python",
202 |    "name": "python2"
203 |   },
204 |   "language_info": {
205 |    "codemirror_mode": {
206 |     "name": "ipython",
207 |     "version": 2
208 |    },
209 |    "file_extension": ".py",
210 |    "mimetype": "text/x-python",
211 |    "name": "python",
212 |    "nbconvert_exporter": "python",
213 |    "pygments_lexer": "ipython2",
214 |    "version": "2.7.11"
215 |   }
216 |  },
217 |  "nbformat": 4,
218 |  "nbformat_minor": 0
219 | }
220 | 


--------------------------------------------------------------------------------
/src/sppysound/Examples/Match Synthesis Example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from sppysound.database import AudioDatabase, Synthesizer\n",
 12 |     "import synthesis_config"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "source_dir = \"./ExampleDatabase\"\n",
 24 |     "target_dir = \"./ExampleTarget\"\n",
 25 |     "output_dir = \"./ExampleOutput\""
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "Load source database.\n",
 33 |     "Also load the F0, RMS and Peak analyses for use with amplitude and pitch enforcement."
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 3,
 39 |    "metadata": {
 40 |     "collapsed": true
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "source_database = AudioDatabase(\n",
 45 |     "    source_dir,\n",
 46 |     "    config=synthesis_config,\n",
 47 |     "    analysis_list={\"f0\", \"rms\", \"peak\"}\n",
 48 |     ")\n",
 49 |     "source_database.load_database(reanalyse=False)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "Load database used to generate matches to source database. \n",
 57 |     "This is used when enforcing analyses such as RMS and F0. (Original grains are needed to calculate the ratio to alter the synthesized grain by)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 4,
 63 |    "metadata": {
 64 |     "collapsed": false
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "target_database = AudioDatabase(\n",
 69 |     "    target_dir,\n",
 70 |     "    config=synthesis_config,\n",
 71 |     "    analysis_list={\"f0\", \"rms\", \"peak\"}\n",
 72 |     ")\n",
 73 |     "target_database.load_database(reanalyse=False)\n",
 74 |     "\n",
 75 |     "output_database = AudioDatabase(\n",
 76 |     "    output_dir,\n",
 77 |     "    config=synthesis_config\n",
 78 |     ")\n",
 79 |     "output_database.load_database(reanalyse=False)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "Initialise the synthesizer object used for generating the final output."
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 5,
 92 |    "metadata": {
 93 |     "collapsed": false
 94 |    },
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "synthesizer = Synthesizer(source_database, output_database, target_db=target_database, config=synthesis_config)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "Run synthesis. As with the matching, warnings may be generated. These have all been accounted for and will be silenced in a future release. The output audio can now be found in the audio folder of ./ExampleOutput"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 6,
110 |    "metadata": {
111 |     "collapsed": false,
112 |     "scrolled": true
113 |    },
114 |    "outputs": [
115 |     {
116 |      "name": "stderr",
117 |      "output_type": "stream",
118 |      "text": [
119 |       "/Users/sam/PerryPerrySource/pysource/pysound/src/sppysound/audiofile.py:665: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.\n",
120 |       "  if self.times == None:\n",
121 |       "/Users/sam/PerryPerrySource/pysource/pysound/src/sppysound/audiofile.py:297: UserWarning: write_frames::warning::audio data has been clipped while writing to file ./.shift_input.wav.\n",
122 |       "  return self.pysndfile_object.write_frames(input)\n",
123 |       "/Users/sam/.pyenv/versions/2.7.10/lib/python2.7/site-packages/numpy/core/_methods.py:59: RuntimeWarning: Mean of empty slice.\n",
124 |       "  warnings.warn(\"Mean of empty slice.\", RuntimeWarning)\n",
125 |       "/Users/sam/.pyenv/versions/2.7.10/lib/python2.7/site-packages/numpy/core/_methods.py:71: RuntimeWarning: invalid value encountered in double_scalars\n",
126 |       "  ret = ret.dtype.type(ret / rcount)\n"
127 |      ]
128 |     }
129 |    ],
130 |    "source": [
131 |     "synthesizer.synthesize()"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "The synthesis_config.py file for this demo is:\n",
139 |     "~~~python\n",
140 |     "rms = {\n",
141 |     "    \"window_size\": 100,\n",
142 |     "    \"overlap\": 2,\n",
143 |     "}\n",
144 |     "\n",
145 |     "analysis_dict = {\n",
146 |     "    \"f0\": \"log2_median\",\n",
147 |     "    \"rms\": \"mean\"\n",
148 |     "}\n",
149 |     "\n",
150 |     "analysis = {\n",
151 |     "    \"reanalyse\": False\n",
152 |     "}\n",
153 |     "\n",
154 |     "output_file = {\n",
155 |     "    \"samplerate\": 44100,\n",
156 |     "    \"format\": 131075,\n",
157 |     "    \"channels\": 1\n",
158 |     "}\n",
159 |     "\n",
160 |     "synthesizer = {\n",
161 |     "    \"enforce_rms\": True,\n",
162 |     "    \"enf_rms_ratio_limit\": 5.,\n",
163 |     "    \"enforce_f0\": True,\n",
164 |     "    \"enf_f0_ratio_limit\": 10.,\n",
165 |     "    \"grain_size\": 100,\n",
166 |     "    \"overlap\": 2,\n",
167 |     "    \"normalize\" : True,\n",
168 |     "    # Defines the number of potential grains to choose from matches when\n",
169 |     "    # synthesizing output.\n",
170 |     "    \"match_quantity\": 20\n",
171 |     "}\n",
172 |     "~~~"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {
179 |     "collapsed": true
180 |    },
181 |    "outputs": [],
182 |    "source": []
183 |   }
184 |  ],
185 |  "metadata": {
186 |   "kernelspec": {
187 |    "display_name": "Python 2",
188 |    "language": "python",
189 |    "name": "python2"
190 |   },
191 |   "language_info": {
192 |    "codemirror_mode": {
193 |     "name": "ipython",
194 |     "version": 2
195 |    },
196 |    "file_extension": ".py",
197 |    "mimetype": "text/x-python",
198 |    "name": "python",
199 |    "nbconvert_exporter": "python",
200 |    "pygments_lexer": "ipython2",
201 |    "version": "2.7.10"
202 |   }
203 |  },
204 |  "nbformat": 4,
205 |  "nbformat_minor": 0
206 | }
207 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/MatchSynthesisExample.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Synthesizing output from matches"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 4,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from sppysound.database import AudioDatabase, Synthesizer, Matcher\n",
 19 |     "import synthesis_config\n",
 20 |     "import config"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 5,
 26 |    "metadata": {
 27 |     "collapsed": true
 28 |    },
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "source_dir = \"./ExampleDatabase\"\n",
 32 |     "target_dir = \"./ExampleTarget\"\n",
 33 |     "output_dir = \"./ExampleOutput\""
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "Load source database.\n",
 41 |     "Also load the F0, RMS and Peak analyses for use with amplitude and pitch enforcement."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 6,
 47 |    "metadata": {
 48 |     "collapsed": false
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "source_database = AudioDatabase(\n",
 53 |     "    source_dir,\n",
 54 |     "    config=synthesis_config,\n",
 55 |     "    analysis_list={\"f0\", \"rms\"}\n",
 56 |     ")\n",
 57 |     "source_database.load_database(reanalyse=True)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "Load database used to generate matches to source database. \n",
 65 |     "This is used when enforcing analyses such as RMS and F0. (Original grains are needed to calculate the ratio to alter the synthesized grain by)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 7,
 71 |    "metadata": {
 72 |     "collapsed": false
 73 |    },
 74 |    "outputs": [
 75 |     {
 76 |      "name": "stdout",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "Traceback (most recent call last):\n",
 80 |       "  File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/database.py\", line 157, in analyse_database\n",
 81 |       "    config=self.config\n",
 82 |       "  File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/audiofile.py\", line 943, in __enter__\n",
 83 |       "    \"empty\".format(self.name))\n",
 84 |       "IOError: File isn't valid: ElectricGuitarSample-out_output.wav\n",
 85 |       "Check that file is mono and isn't empty\n"
 86 |      ]
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "target_database = AudioDatabase(\n",
 91 |     "    target_dir,\n",
 92 |     "    config=synthesis_config,\n",
 93 |     "    analysis_list={\"f0\", \"rms\"}\n",
 94 |     ")\n",
 95 |     "target_database.load_database(reanalyse=True)\n",
 96 |     "\n",
 97 |     "output_database = AudioDatabase(\n",
 98 |     "    output_dir,\n",
 99 |     "    config=config\n",
100 |     ")\n",
101 |     "output_database.load_database(reanalyse=False)\n",
102 |     "\n",
103 |     "matcher = Matcher(\n",
104 |     "    source_database,\n",
105 |     "    target_database,\n",
106 |     "    output_db=output_database,\n",
107 |     "    config=config,\n",
108 |     "    rematch=True\n",
109 |     ")\n",
110 |     "matcher.match(\n",
111 |     "    matcher.kdtree_matcher,\n",
112 |     "    grain_size=config.matcher[\"grain_size\"],\n",
113 |     "    overlap=config.matcher[\"overlap\"]\n",
114 |     ")"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "Initialise the synthesizer object used for generating the final output."
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 8,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "synthesizer = Synthesizer(\n",
133 |     "    source_database, \n",
134 |     "    output_database, \n",
135 |     "    target_db=target_database, \n",
136 |     "    config=config\n",
137 |     ")"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "Run synthesis. As with the matching, warnings may be generated. These have all been accounted for and will be silenced in a future release. The output audio can now be found in the audio folder of ./ExampleOutput"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 9,
150 |    "metadata": {
151 |     "collapsed": false,
152 |     "scrolled": true
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "synthesizer.synthesize(\n",
157 |     "    grain_size=config.synthesizer[\"grain_size\"],\n",
158 |     "    overlap=config.synthesizer[\"overlap\"]\n",
159 |     ")"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "The synthesis_config.py file for this demo is:\n",
167 |     "\n",
168 |     "~~~python\n",
169 |     "rms = {\n",
170 |     "    \"window_size\": 100,\n",
171 |     "    \"overlap\": 2,\n",
172 |     "}\n",
173 |     "\n",
174 |     "analysis_dict = {\n",
175 |     "    \"f0\": \"log2_median\",\n",
176 |     "    \"rms\": \"mean\"\n",
177 |     "}\n",
178 |     "\n",
179 |     "analysis = {\n",
180 |     "    \"reanalyse\": False\n",
181 |     "}\n",
182 |     "\n",
183 |     "output_file = {\n",
184 |     "    \"samplerate\": 44100,\n",
185 |     "    \"format\": 131075,\n",
186 |     "    \"channels\": 1\n",
187 |     "}\n",
188 |     "\n",
189 |     "synthesizer = {\n",
190 |     "    \"enforce_rms\": True,\n",
191 |     "    \"enf_rms_ratio_limit\": 5.,\n",
192 |     "    \"enforce_f0\": True,\n",
193 |     "    \"enf_f0_ratio_limit\": 10.,\n",
194 |     "    \"grain_size\": 100,\n",
195 |     "    \"overlap\": 2,\n",
196 |     "    \"normalize\" : True,\n",
197 |     "    # Defines the number of potential grains to choose from matches when\n",
198 |     "    # synthesizing output.\n",
199 |     "    \"match_quantity\": 20\n",
200 |     "}\n",
201 |     "~~~"
202 |    ]
203 |   }
204 |  ],
205 |  "metadata": {
206 |   "kernelspec": {
207 |    "display_name": "Python 2",
208 |    "language": "python",
209 |    "name": "python2"
210 |   },
211 |   "language_info": {
212 |    "codemirror_mode": {
213 |     "name": "ipython",
214 |     "version": 2
215 |    },
216 |    "file_extension": ".py",
217 |    "mimetype": "text/x-python",
218 |    "name": "python",
219 |    "nbconvert_exporter": "python",
220 |    "pygments_lexer": "ipython2",
221 |    "version": "2.7.11"
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 0
226 | }
227 | 


--------------------------------------------------------------------------------
/src/sppysound/synthesis/synthesis_tools.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import numpy as np
  3 | from sppysound import AudioFile
  4 | import matplotlib.pyplot as plt
  5 | import pdb
  6 | import scipy
  7 | 
  8 | 
  9 | def convolve(input, impulse_response):
 10 |     out = np.zeros(len(input) + len(impulse_response) - 1)
 11 |     for input_ind, i in enumerate(input):
 12 |         for imp_ind, j in enumerate(impulse_response):
 13 |             out[input_ind+imp_ind] = out[input_ind+imp_ind] + i*j
 14 |     return out
 15 | 
 16 | def moving_average_filter_recursive(input, M, symetry = 'after'):
 17 |     '''
 18 |     Applies a moving average filter to the input.
 19 | 
 20 |     Arguments:
 21 |         input - the input signal to filter.
 22 |         symetry - ('before' or 'middle') defines how points will be
 23 |         averaged around the index
 24 |         M - the number of coefficients.
 25 |     '''
 26 |     # Calculate the filter coefficients
 27 |     filter_kernal = np.ones(M) / M
 28 |     # Get the pre-zero-padded input size.
 29 |     input_size = input.size
 30 |     # Pad end of input with zeros.
 31 |     if symetry == 'after':
 32 |         # Zero-pad input at end on input for averaging of end samples
 33 |         input = np.hstack((input, np.zeros(M)))
 34 |     elif symetry == 'middle':
 35 |         # M value must be odd to have an equal number of samples on each side.
 36 |         if not M % 2:
 37 |             raise ValueError("M must be odd for symetrical averaging")
 38 |         # Calculate the zero padding size.
 39 |         offset = np.floor(M/2.0)
 40 |         # Zero pad input on both sides to allow for averaging from first sample
 41 |         # to last sample
 42 |         input = np.hstack((np.zeros(offset), input, np.zeros(offset)))
 43 | 
 44 | 
 45 |     # Calculate the number of output samples.
 46 |     # y = np.zeros(input.size-M)
 47 | 
 48 |     y = np.zeros(input.size-M)
 49 |     # If averaging after first sample.
 50 |     if symetry == 'after':
 51 |         # For each sample in the input
 52 |         acc = 0
 53 | 
 54 |         i = 0
 55 |         while i < M:
 56 |             acc += input[i]
 57 |             i += 1
 58 |         y[0] = acc / M
 59 | 
 60 |         i = 1
 61 |         while i < input.size-M:
 62 |             acc += input[i+M-1] - input[i-1]
 63 |             y[i] = acc/M
 64 |             i += 1
 65 |         print(y)
 66 | 
 67 |     elif symetry == 'middle':
 68 |         # TODO: Make recursive
 69 |         i = 0
 70 |         # For all the input samples
 71 |         while i < input_size-offset:
 72 |             # The output sample is the average sample value for M samples.
 73 |             y[i] = np.sum(input[i:i+M] * filter_kernal)
 74 |             i += 1
 75 |     return y
 76 | 
 77 | def moving_average_filter(input, M, symetry = 'after'):
 78 |     '''
 79 |     Applies a moving average filter to the input.
 80 | 
 81 |     Arguments:
 82 |         input - the input signal to filter.
 83 |         symetry - ('before' or 'middle') defines how points will be
 84 |         averaged around the index
 85 |         M - the number of coefficients.
 86 |     '''
 87 |     # Calculate the filter coefficients
 88 |     filter_kernal = np.ones(M) / M
 89 |     # Get the pre-zero-padded input size.
 90 |     input_size = input.size
 91 |     # Pad end of input with zeros.
 92 |     if symetry == 'after':
 93 |         # Zero-pad input at end on input for averaging of end samples
 94 |         input = np.hstack((input, np.zeros(M)))
 95 |     elif symetry == 'middle':
 96 |         # M value must be odd to have an equal number of samples on each side.
 97 |         if not M % 2:
 98 |             raise ValueError("M must be odd for symetrical averaging")
 99 |         # Calculate the zero padding size.
100 |         offset = np.floor(M/2.0)
101 |         # Zero pad input on both sides to allow for averaging from first sample
102 |         # to last sample
103 |         input = np.hstack((np.zeros(offset), input, np.zeros(offset)))
104 | 
105 | 
106 |     # Calculate the number of output samples.
107 |     y = np.zeros(input.size-M)
108 | 
109 |     # If averaging after first sample.
110 |     if symetry == 'after':
111 |         i = 0
112 |         # For each sample in the input
113 |         while i < input_size:
114 |             y[i] = np.sum(input[i:i+M] / M)
115 |             i += 1
116 |     # If averaging symetrically
117 |     elif symetry == 'middle':
118 |         i = 0
119 |         # For all the input samples
120 |         while i < input_size-offset:
121 |             # The output sample is the average sample value for M samples.
122 |             y[i] = np.sum(input[i:i+M] / M)
123 |             i += 1
124 |     return y
125 | 
126 | def blackman_filter(input, window_size, freq):
127 |     '''
128 |     Create a blackman windowed-sinc filter.
129 | 
130 |     freq - The cutoff frequency of the filter specified as a proportion of the
131 |     samplerate of the signal.
132 |     '''
133 |     # TODO: Check the definition of freq is correct.
134 | 
135 |     i = np.arange(window_size)
136 |     # Create a sinc function of M length.
137 |     # The output will be a sinc function shifted from -M/2 - M/2 to 0 - M.
138 |     # This will result in a sinc function that can be used to create a filter
139 |     # at the cutoff-frequency provided in freq.
140 |     sinc_kernal = np.sin(2*np.pi*freq*(i-window_size/2))/(i-window_size/2)
141 | 
142 |     # Create a blackman window
143 |     window = 0.42 - 0.5 * np.cos(2 * np.pi * (i / window_size)) + 0.08 * np.cos(4 * np.pi * (i / window_size))
144 |     window_sinc = sinc_kernal * window
145 | 
146 |     # Number of samplepoints
147 |     N = window_size
148 |     # sample spacing
149 |     T = 1.0 / 800.0
150 |     yf = scipy.fftpack.fft(window_sinc)
151 |     xf = np.linspace(0.0, 1.0/(2.0*T), N/2)
152 | 
153 | 
154 |     plt.subplot(311)
155 |     plt.title('Blackman Window')
156 |     plt.plot(window)
157 |     plt.ylabel('Amplitude')
158 |     plt.xlabel('sample')
159 |     plt.subplot(312)
160 |     plt.title('Window sinc function')
161 |     plt.plot(sinc_kernal)
162 |     plt.subplot(313)
163 |     plt.title('FFT')
164 |     plt.plot(xf, 2.0/N * np.abs(yf[0:N/2]))
165 |     plt.show()
166 | 
167 | if __name__ == "__main__":
168 |     '''
169 |     a = np.array([1, 0.5, 3, 1])
170 |     b = np.array([1, 0, 0, 0])
171 |     c = convolve(a, b)
172 |     print(c)
173 |     print(np.convolve(a, b))
174 |     '''
175 |     with AudioFile('./test_audio.aif', 'r') as test_audio:
176 |         grain = test_audio.read_grain(0, -1)
177 |     grain = np.arange(5000)
178 |     filtered_grain = moving_average_filter(grain, 101)
179 |     filtered_r_grain = moving_average_filter_recursive(grain, 101)
180 | 
181 |     blackman_filter(grain, 101, 0.14)
182 | 
183 |     '''
184 |     # Plot test wave
185 |     plt.subplot(211)
186 |     plt.title('Original Wave')
187 |     plt.plot(grain)
188 |     plt.ylabel('Amplitude')
189 |     plt.xlabel('sample')
190 |     plt.subplot(212)
191 |     plt.title('Filtered Wave')
192 |     plt.plot(filtered_grain)
193 |     plt.ylabel('Amplitude')
194 |     plt.xlabel('sample')
195 |     plt.show()
196 |     '''
197 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/Analysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import numpy as np
  4 | import logging
  5 | import pdb
  6 | 
  7 | from fileops import pathops
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | class Analysis(object):
 12 | 
 13 |     """
 14 |     Basic descriptor class to build analyses on.
 15 | 
 16 |     The Analysis base class works as an interface between child descriptor
 17 |     objects and the HDF5 storage file. This is designed to seperate descriptor
 18 |     generation from data IO, allowing for quick development of new descriptor
 19 |     classes.  The base Analysis class has methods for retreiving analyses from
 20 |     file and saving data created by analysis objects to file. It also has basic
 21 |     formatting methods used to return data in the required format for processed
 22 |     such as descriptor comparisons.
 23 | 
 24 |     In order to create a new descriptor, the hdf5_dataset_formatter method will
 25 |     need to be overwritten by the child class to generate and store the
 26 |     descriptor's output in the appropriate manner. Examples of this can be seen
 27 |     through the currently implemented descriptors.
 28 |     """
 29 | 
 30 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, name, config=None):
 31 |         # Create object logger
 32 |         self.logger = logging.getLogger(__name__ + '.{0}Analysis'.format(name))
 33 |         # Store AnalysedAudioFile object to be analysed.
 34 |         self.AnalysedAudioFile = AnalysedAudioFile
 35 |         self.analysis_group = analysis_group
 36 |         self.name = name
 37 | 
 38 |     def create_analysis(self, *args, **kwargs):
 39 |         """
 40 |         Create the analysis and save to the HDF5 file.
 41 | 
 42 |         analysis_function: The function used to create the analysis. returned
 43 |         data will be stored in the HDF5 file.
 44 |         """
 45 | 
 46 |         try:
 47 |             self.analysis = self.analysis_group.create_group(self.name)
 48 |         except ValueError:
 49 |             self.logger.info("{0} analysis group already exists".format(self.name))
 50 |             self.analysis = self.analysis_group[self.name]
 51 | 
 52 |         # If forcing new analysis creation then delete old analysis and create
 53 |         # a new one
 54 |         if self.AnalysedAudioFile.force_analysis:
 55 |             self.logger.info("Force re-analysis is enabled. "
 56 |                                 "deleting: {0}".format(self.analysis.name))
 57 |             # Delete all pre-existing data in database.
 58 |             for i in self.analysis.iterkeys():
 59 |                 del self.analysis[i]
 60 |             # Run the analysis function and format it's returned data ready to
 61 |             # be saved in the HDF5 file
 62 |             data_dict, attrs_dict = self.hdf5_dataset_formatter(*args, **kwargs)
 63 |             for key, value in data_dict.iteritems():
 64 |                 self.analysis.create_dataset(key, data=value, chunks=True)
 65 |             for key, value in attrs_dict.iteritems():
 66 |                 self.analysis.attrs[key] = value
 67 |         else:
 68 | 
 69 |             if self.analysis.keys():
 70 |                 self.logger.info("Analysis already exists. Reading from: "
 71 |                                  "{0}".format(self.analysis.name))
 72 |             else:
 73 |                 # If it doesn't then generate a new file
 74 |                 # Run the analysis function and format it's returned data ready to
 75 |                 # be saved in the HDF5 file
 76 |                 data_dict, attrs_dict = self.hdf5_dataset_formatter(*args, **kwargs)
 77 |                 for key, value in data_dict.iteritems():
 78 |                     self.analysis.create_dataset(key, data=value, chunks=True)
 79 |                 for key, value in attrs_dict.iteritems():
 80 |                     self.analysis.attrs[key] = value
 81 | 
 82 |     def get_analysis_grains(self, start, end):
 83 |         """
 84 |         Retrieve analysis frames for period specified in start and end times.
 85 |         arrays of start and end time pairs will produce an array of equivelant
 86 |         size containing frames for these times.
 87 |         """
 88 |         times = self.analysis_group[self.name]["times"][:]
 89 |         start = start / 1000
 90 |         end = end / 1000
 91 |         vtimes = times.reshape(-1, 1)
 92 | 
 93 |         selection = np.transpose((vtimes >= start) & (vtimes <= end))
 94 |         # If there are no frames for this grain, take the two closest frames
 95 |         # from the adjacent grains.
 96 |         if not selection.any():
 97 |             frame_center = start + (end-start)/2.
 98 |             closest_frames = np.abs(vtimes-frame_center).argsort()[:2]
 99 |             selection[closest_frames] = True
100 | 
101 |         #start_ind = np.min(selection)
102 |         #end_ind = np.argmax(selection)
103 |         frames = self.analysis_group[self.name]["frames"][:]
104 | 
105 |         grain_data = (frames, selection)
106 | 
107 |         return grain_data
108 | 
109 |     def hdf5_dataset_formatter(analysis_method, *args, **kwargs):
110 |         '''
111 |         Note: This is a generic formatter designed as a template to be
112 |         overwritten by a descriptor sub-class.
113 | 
114 |         Formats the output from the analysis method to save to the HDF5 file.
115 | 
116 |         Places data and attributes in 2 dictionaries to be stored in the HDF5
117 |         file.
118 |         '''
119 |         output, attributes = analysis_method(*args, **kwargs)
120 |         return ({'data': output}, {'attrs': attributes})
121 | 
122 |     ################################################################################
123 |     # Formatting functions
124 |     ################################################################################
125 | 
126 |     def log2_median(self, x):
127 |         return np.median(1000 * np.log2(1+x/1000))
128 | 
129 |     def log2_mean(self, x):
130 |         return np.mean(1000 * np.log2(1+x/1000))
131 | 
132 |     def formatter_func(self, selection, frames, valid_inds, formatter=None):
133 |         # get all valid frames from current grain
134 |         frames = frames[selection & valid_inds]
135 | 
136 |         return formatter(frames)
137 |         #if less than half the frames are valid then the grain is not valid.
138 |         if frames.size < valid_inds[selection].nonzero()[0].size/2:
139 |             return np.nan
140 | 
141 |     def analysis_formatter(self, frames, selection, format):
142 |         """Calculate the average analysis value of the grain using the match format specified."""
143 |         valid_inds = np.isfinite(frames)
144 | 
145 |         format_style_dict = {
146 |             'mean': np.mean,
147 |             'median': np.median,
148 |             'log2_mean': self.log2_mean,
149 |             'log2_median': self.log2_median,
150 |         }
151 |         output = np.empty(len(selection))
152 | 
153 |         if not selection.size:
154 |             # TODO: Add warning here
155 |             return np.nan
156 |         # For debugging apply_along_axis:
157 |         #for ind, i in enumerate(selection):
158 |         #    output[ind] = self.formatter_func(i, frames, valid_inds, formatter=format_style_dict[format])
159 | 
160 |         output = np.apply_along_axis(self.formatter_func, 1, selection, frames, valid_inds, formatter=format_style_dict[format])
161 |         return output
162 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/AttackAnalysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import os
  3 | import numpy as np
  4 | import math
  5 | import pdb
  6 | import logging
  7 | 
  8 | from fileops import pathops
  9 | 
 10 | logger = logging.getLogger(__name__).addHandler(logging.NullHandler())
 11 | 
 12 | 
 13 | class AttackAnalysis:
 14 | 
 15 |     """Encapsulation of attack estimation analysis."""
 16 | 
 17 |     def __init__(self, AnalysedAudioFile, atkpath, config=None):
 18 |         self.logger = logging.getLogger(__name__ + '.AttackAnalysis')
 19 |         self.AnalysedAudioFile = AnalysedAudioFile
 20 |         self.attackpath = atkpath
 21 |         self.attack_start = None
 22 |         self.attack_end = None
 23 |         self.attack_size = None
 24 |         self.logattacktime = None
 25 |         # Check if analysis file already exists.
 26 |         # TODO: check if RMS has changed, if it has then new values will need
 27 |         # to be generated even if a file already exists.
 28 |         if not self.attackpath:
 29 |             if not self.AnalysedAudioFile.db_dir:
 30 |                 raise IOError("Analysed Audio object must have an atk file"
 31 |                               " path or be part of a database")
 32 |             self.attackpath = os.path.join(
 33 |                 self.AnalysedAudioFile.db_dir,
 34 |                 "atk",
 35 |                 self.AnalysedAudioFile.name +
 36 |                 ".lab")
 37 |         if self.AnalysedAudioFile.force_analysis:
 38 |             pathops.delete_if_exists(self.attackpath)
 39 |             self.attackpath = self.create_attack_analysis()
 40 |         else:
 41 |             try:
 42 |                 # If it does then get values from file
 43 |                 self.get_attack_from_file()
 44 |             except IOError:
 45 |                 # Otherwise, generate new values
 46 |                 self.create_attack_analysis()
 47 | 
 48 |     def create_attack_analysis(self, multiplier=3):
 49 |         """
 50 |         Estimate the start and end of the attack of the audio.
 51 | 
 52 |         Adaptive threshold method (weakest effort method) described here:
 53 |         http://recherche.ircam.fr/anasyn/peeters/ARTICLES/Peeters_2003_cuidadoaudiofeatures.pdf
 54 |         Stores values in a file at the attack path provided with the following
 55 |         format:
 56 |         attack_start attack_end
 57 |         """
 58 |         pdb.set_trace()
 59 |         # Make sure RMS has been calculated
 60 |         if not self.AnalysedAudioFile.RMS:
 61 |             raise IOError("RMS analysis is required to estimate attack")
 62 |         with open(self.attackpath, 'w') as attackfile:
 63 |             self.logger.info("Creating attack estimation file:\t\t",
 64 |                   os.path.relpath(self.attackpath))
 65 |             rms_contour = self.AnalysedAudioFile.RMS.get_rms_from_file()
 66 |             # Scale RMS contour to range so all calculations are performed in
 67 |             # the range 0.0 to 1.0
 68 |             # TODO: Should calculations be done in range of rms rather than
 69 |             # converting for performance increase?
 70 |             rms_contour = self.scale_to_range(rms_contour)
 71 |             # Create a grid of thresholds ranging from 0.0 to 1.0
 72 |             thresholds = np.arange(1, 11) * 0.1
 73 |             thresholds = thresholds.reshape(-1, 1)
 74 |             # Find first index of rms that is over the threshold for each
 75 |             # thresholds
 76 |             threshold_inds = np.argmax(rms_contour >= thresholds, axis=1)
 77 | 
 78 |             # TODO: Need to make sure rms does not return to a lower threshold
 79 |             # after being > a threshold.
 80 | 
 81 |             # Calculate the time difference between each of the indexes
 82 |             ind_diffs = np.ediff1d(threshold_inds)
 83 |             # Find the average time between thresholds
 84 |             mean_ind_diff = np.mean(ind_diffs)
 85 |             # Calculate the start threshold by finding the first threshold that
 86 |             # goes below the average time * the multiplier
 87 |             try:
 88 |                 # For each threshold value find the times where the signal goes
 89 |                 # from below the threshold to above the threshold
 90 | 
 91 |                 # find the smallest positive time between each threshold
 92 |                 # passing to the next threshold. each sucsessive time cannot be
 93 |                 # less than that of the previous times?
 94 | 
 95 |                 a = np.argmax(ind_diffs < (mean_ind_diff * multiplier))
 96 |                 attack_start_ind = threshold_inds[a]
 97 |                 # Calculate the end threshold by thr same method except looking
 98 |                 # above the average time * the multiplier
 99 |                 best_end_thresh = ind_diffs > (mean_ind_diff * multiplier)
100 |                 if not best_end_thresh:
101 |                     attack_end_ind = threshold_inds[-1]
102 |                 else:
103 |                     attack_end_ind = threshold_inds[np.argmax(best_end_thresh)]
104 |             except ValueError as err:
105 |                 raise ValueError("Attack estimation failed: {0}".format(err))
106 |             self.logger.info("START: {0}\nEND: {1}".format(attack_start_ind, attack_end_ind))
107 |             # TODO: Refine position by searching for local min and max of these
108 |             # values
109 |             self.attack_start = self.AnalysedAudioFile.samps_to_secs(
110 |                 attack_start_ind)
111 |             self.attack_end = self.AnalysedAudioFile.samps_to_secs(
112 |                 attack_end_ind)
113 |             # Values are stored in the file with the following format:
114 |             # attack_start attack_end
115 |             attackfile.write("{0} {1}\n".format(self.attack_start,
116 |                                                 self.attack_end))
117 | 
118 |     def calc_log_attack_time(self):
119 |         """
120 |         Calculate the logarithm of the time duration between the time the
121 |         signal starts to the time that the signal reaches it's stable part
122 |         Described here:
123 |         http://recherche.ircam.fr/anasyn/peeters/ARTICLES/Peeters_2003_cuidadoaudiofeatures.pdf
124 |         """
125 |         if not self.attack_start or not self.attack_end:
126 |             raise ValueError("Attack times must be calculated before calling"
127 |                              "the log attack time method")
128 |         self.logattacktime = math.log10(self.attackend-self.attackstart)
129 | 
130 |     def get_attack_from_file(self):
131 |         """Read the attack values from a previously generated file."""
132 |         # TODO:
133 |         self.logger.info("Reading attack estimation file:\t\t",
134 |               os.path.relpath(self.attackpath))
135 |         with open(self.attackpath, 'r') as attackfile:
136 |             for line in attackfile:
137 |                 # Split the values and convert to their correct types
138 |                 starttime, endtime = line.split()
139 |                 self.attack_start = float(starttime)
140 |                 self.attack_end = float(endtime)
141 |                 self.attack_size = self.attack_end - self.attack_start
142 | 
143 |     @staticmethod
144 |     def scale_to_range(array, high=1.0, low=0.0):
145 |         mins = np.min(array)
146 |         maxs = np.max(array)
147 |         rng = maxs - mins
148 |         return high - (((high - low) * (maxs - array)) / rng)
149 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/overview.rst:
--------------------------------------------------------------------------------
  1 | .. _overview:
  2 | 
  3 | Overview
  4 | ========
  5 | Concatenator is a tool for synthesizing interpretations of a sound, through the
  6 | analysis and synthesis of audio grains from a corpus database.
  7 | The program works by analysing overlapping segments of audio (known as grains)
  8 | from both the target sound and the source database, then searching for the
  9 | closest matching grain in the source database to the target sound. Finally, the
 10 | output is generated by overlap-adding the best matches.
 11 | 
 12 | To create the final output, there are three main operations to perform:
 13 | 
 14 | .. graphviz::
 15 | 
 16 |    digraph a {
 17 |       "Database Analysis" -> "Grain Matching" -> "Output Synthesis";
 18 |    }
 19 | 
 20 | .. raw:: latex
 21 | 
 22 |     \newpage
 23 | 
 24 | Analysis
 25 | --------
 26 | 
 27 | First, the descriptor analyses are generated for each audio file in both the
 28 | source and target database. Full details on the types of descriptor available
 29 | and their function can be found in the :ref:`descriptor_defs` section of
 30 | this documentation. Analyses are then stored to a HDF5 file, ready for
 31 | matching.
 32 | 
 33 | .. graphviz::
 34 | 
 35 |    digraph b {
 36 |       subgraph cluster0 {
 37 |         style=filled;
 38 |         color=lightgrey;
 39 |         node [shape=record,width=.1,height=.1];
 40 |         node0 [label = "<f0> | <f1> | <f2> | <f3> | <f4> | <f5> | <f6> ",width=2.5]
 41 |         label = "Audio\nFiles";
 42 |         labeljust="l";
 43 |       }
 44 | 
 45 |       subgraph cluster2 {
 46 |         style=filled;
 47 |         color=lightgrey;
 48 |         node [shape=record,width=.1,height=.1];
 49 |         node2 [label = "RMS | F0 | Centroid | Kurtosis | etc..."]
 50 |         label = "Analyses";
 51 |         labeljust="l";
 52 |       }
 53 |     database[shape=rectangle, label="Audio Directory"];
 54 |     HDF[shape=rectangle, label="HDF5 File"];
 55 |     database -> node0;
 56 |     node0 -> node2;
 57 |     node2 -> HDF
 58 |    }
 59 | 
 60 | .. raw:: latex
 61 | 
 62 |     \newpage
 63 | 
 64 | Matching
 65 | --------
 66 | 
 67 | Both the source and target HDF5 files are loaded to compare the values of their
 68 | analyses. Each audio file's analyses are split into equally sized overlapping
 69 | grains and averaged in the appropriate way to be compared to grains from the
 70 | other database.
 71 | The matching algorithm then calculates the grains that have the smallest
 72 | overall difference, based on user defined weightings for each of the analysis
 73 | types. This weighting of analyses allows for certain analyses to gain
 74 | precedence over others based on user preference.
 75 | The best match indexes are then saved to the output database ready for
 76 | synthesis.
 77 | 
 78 | There are currently two implementations for the matching algorithm:
 79 | 
 80 | - Brute Force
 81 | 
 82 | - K-d Tree Search
 83 | 
 84 | Both will return similar results, however the K-d tree search algorithm is
 85 | far more efficient when analysing large datasets so is the preferred method.
 86 | 
 87 | .. graphviz::
 88 | 
 89 |    digraph b {
 90 |       subgraph cluster0 {
 91 |         style=filled;
 92 |         color=lightgrey;
 93 |         node [shape=record,width=.1,height=.1];
 94 |         node0 [label = "<f0> | <f1> | <f2>Source | <f3>Audio | <f4>Analysis | <f5> | <f6> ",width=2.5]
 95 | 
 96 |         labeljust="l";
 97 |       }
 98 | 
 99 |       subgraph cluster1 {
100 |         style=filled;
101 |         color=lightgrey;
102 |         node [shape=record,width=.1,height=.1];
103 |         node1 [label = "<f0> | <f1> | <f2> | <f3> | <f4> | <f5> | <f6> | <f7> | <f8> | <f9>Source | <f10>Analysis | <f11>Grains | <f12> | <f13> |  <f14> | <f15> | <f16> | <f17> | <f18> | <f19> | <f20> ",width=2.5]
104 |         label="\n\n\n\n";
105 |         labeljust="l";
106 |       }
107 | 
108 |       subgraph cluster2 {
109 |         style=filled;
110 |         color=lightgrey;
111 |         node [shape=record,width=.1,height=.1];
112 |         node2 [label = "<f0>Target Audio Analysis"]
113 |         labeljust="l";
114 |       }
115 |       subgraph cluster3 {
116 |         style=filled;
117 |         color=lightgrey;
118 |         node [shape=record,width=.1,height=.1];
119 |         node3 [label = "<f0> | <f1> | <f2>Target | <f3>Analysis | <f4>Grains | <f5> | <f6>",width=2.5]
120 |         label="\n\n\n\n";
121 |         labeljust="l";
122 |       }
123 |     database1[shape=rectangle, label="Source HDF5 File"];
124 |     database2[shape=rectangle, label="Target HDF5 File"];
125 |     database3[shape=rectangle, label="Output HDF5 File"];
126 |     matcher[shape=rectangle, label="Matching Algorithm"];
127 | 
128 |     node0:f0 -> node1:f0
129 |     node0:f0 -> node1:f1
130 |     node0:f0 -> node1:f2
131 |     node0:f1 -> node1:f3
132 |     node0:f1 -> node1:f4
133 |     node0:f1 -> node1:f5
134 |     node0:f2 -> node1:f6
135 |     node0:f2 -> node1:f7
136 |     node0:f2 -> node1:f8
137 |     node0:f3 -> node1:f9
138 |     node0:f3 -> node1:f10
139 |     node0:f3 -> node1:f11
140 |     node0:f4 -> node1:f12
141 |     node0:f4 -> node1:f13
142 |     node0:f4 -> node1:f14
143 |     node0:f5 -> node1:f15
144 |     node0:f5 -> node1:f16
145 |     node0:f5 -> node1:f17
146 |     node0:f6 -> node1:f18
147 |     node0:f6 -> node1:f19
148 |     node0:f6 -> node1:f20
149 |     node2:f0 -> node3:f0
150 |     node2:f0 -> node3:f1
151 |     node2:f0 -> node3:f2
152 |     node2:f0 -> node3:f3
153 |     node2:f0 -> node3:f4
154 |     node2:f0 -> node3:f5
155 |     node2:f0 -> node3:f6
156 |     database1 -> node0;
157 |     database2 -> node2;
158 |     node1 -> matcher
159 |     node3 -> matcher
160 |     matcher -> database3
161 | 
162 |    }
163 | 
164 | .. raw:: latex
165 | 
166 |     \newpage
167 | 
168 | Synthesis
169 | ---------
170 | 
171 | The synthesis process involves loading the best match grains from the source
172 | database, performing any post-processing (such as pitch shifting and amplitude
173 | scaling) to improve the similarity of the match, then windowed overlap adding
174 | the grains to create the final output. The post-processing phase involves using
175 | the ratio difference between the source and target grain to artificially alter
176 | the source grain so that it better resembles the target. This is particularly
177 | useful when using small source databases as it improves the similarity of any
178 | match (important when best matches aren't very close to the target.) The final
179 | output is saved to the output database's audio directory.
180 | 
181 | .. graphviz::
182 | 
183 |     digraph b {
184 |             subgraph cluster3 {
185 |             style=filled;
186 |             color=lightgrey;
187 |             node [shape=record,width=.1,height=.1];
188 |             node3 [label = "<f0> | <f1> | <f2>Matched | <f3>Audio | <f4>Grains | <f5> ",width=2.5]
189 |             }
190 |         database1[shape=rectangle, label="Source Audio"];
191 |         database3[shape=rectangle, label="Output HDF5 File"];
192 |         synthesizer[shape=rectangle, label="Windowed Overlap/Add"];
193 |         output[shape=rectangle, label="Output Audio File"];
194 | 
195 |         database3 -> database1[label="Get match grains"];
196 |         database1 -> node3:f0;
197 |         database1 -> node3:f1;
198 |         database1 -> node3:f2;
199 |         database1 -> node3:f3;
200 |         database1 -> node3:f4;
201 |         database1 -> node3:f5;
202 |         node3:f0 -> synthesizer;
203 |         node3:f1 -> synthesizer;
204 |         node3:f2 -> synthesizer;
205 |         node3:f3 -> synthesizer;
206 |         node3:f4 -> synthesizer;
207 |         node3:f5 -> synthesizer;
208 |         synthesizer -> output;
209 | 
210 |     }
211 | 


--------------------------------------------------------------------------------
/src/sppysound/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  applehelp  to make an Apple Help Book"
 34 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 35 | 	@echo "  epub       to make an epub"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | html:
 55 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 58 | 
 59 | dirhtml:
 60 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 61 | 	@echo
 62 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 63 | 
 64 | singlehtml:
 65 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 66 | 	@echo
 67 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 68 | 
 69 | pickle:
 70 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 71 | 	@echo
 72 | 	@echo "Build finished; now you can process the pickle files."
 73 | 
 74 | json:
 75 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 76 | 	@echo
 77 | 	@echo "Build finished; now you can process the JSON files."
 78 | 
 79 | htmlhelp:
 80 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 81 | 	@echo
 82 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 83 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 84 | 
 85 | qthelp:
 86 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 89 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 90 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/TheConcatenator.qhcp"
 91 | 	@echo "To view the help file:"
 92 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/TheConcatenator.qhc"
 93 | 
 94 | applehelp:
 95 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
 96 | 	@echo
 97 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
 98 | 	@echo "N.B. You won't be able to view it unless you put it in" \
 99 | 	      "~/Library/Documentation/Help or install it in your application" \
100 | 	      "bundle."
101 | 
102 | devhelp:
103 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
104 | 	@echo
105 | 	@echo "Build finished."
106 | 	@echo "To view the help file:"
107 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/TheConcatenator"
108 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/TheConcatenator"
109 | 	@echo "# devhelp"
110 | 
111 | epub:
112 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
113 | 	@echo
114 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
115 | 
116 | latex:
117 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
118 | 	@echo
119 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
120 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
121 | 	      "(use \`make latexpdf' here to do that automatically)."
122 | 
123 | latexpdf:
124 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
125 | 	@echo "Running LaTeX files through pdflatex..."
126 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
127 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
128 | 
129 | latexpdfja:
130 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
131 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
132 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
133 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
134 | 
135 | text:
136 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
137 | 	@echo
138 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
139 | 
140 | man:
141 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
142 | 	@echo
143 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
144 | 
145 | texinfo:
146 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
147 | 	@echo
148 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
149 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
150 | 	      "(use \`make info' here to do that automatically)."
151 | 
152 | info:
153 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
154 | 	@echo "Running Texinfo files through makeinfo..."
155 | 	make -C $(BUILDDIR)/texinfo info
156 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
157 | 
158 | gettext:
159 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
160 | 	@echo
161 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
162 | 
163 | changes:
164 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
165 | 	@echo
166 | 	@echo "The overview file is in $(BUILDDIR)/changes."
167 | 
168 | linkcheck:
169 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
170 | 	@echo
171 | 	@echo "Link check complete; look for any errors in the above output " \
172 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
173 | 
174 | doctest:
175 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
176 | 	@echo "Testing of doctests in the sources finished, look at the " \
177 | 	      "results in $(BUILDDIR)/doctest/output.txt."
178 | 
179 | coverage:
180 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
181 | 	@echo "Testing of coverage in the sources finished, look at the " \
182 | 	      "results in $(BUILDDIR)/coverage/python.txt."
183 | 
184 | xml:
185 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
186 | 	@echo
187 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
188 | 
189 | pseudoxml:
190 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
191 | 	@echo
192 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
193 | 


--------------------------------------------------------------------------------
/src/sppysound/multirate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """Module providing Multirate signal processing functionality.
  3 | Largely based on MATLAB's Multirate signal processing toolbox with consultation
  4 | of Octave m-file source code.
  5 | 
  6 | Ref: https://github.com/mubeta06/python/blob/master/signal_processing/sp/multirate.py
  7 | """
  8 | 
  9 | import sys
 10 | import fractions
 11 | import numpy
 12 | from scipy import signal
 13 | 
 14 | 
 15 | def downsample(s, n, phase=0):
 16 |     """Decrease sampling rate by integer factor n with included offset phase.
 17 |     """
 18 |     return s[phase::n]
 19 | 
 20 | 
 21 | def upsample(s, n, phase=0):
 22 |     """Increase sampling rate by integer factor n  with included offset phase.
 23 |     """
 24 |     return numpy.roll(numpy.kron(s, numpy.r_[1, numpy.zeros(n-1)]), phase)
 25 | 
 26 | 
 27 | def decimate(s, r, n=None, fir=False):
 28 |     """Decimation - decrease sampling rate by r. The decimation process filters
 29 |     the input data s with an order n lowpass filter and then resamples the
 30 |     resulting smoothed signal at a lower rate. By default, decimate employs an
 31 |     eighth-order lowpass Chebyshev Type I filter with a cutoff frequency of
 32 |     0.8/r. It filters the input sequence in both the forward and reverse
 33 |     directions to remove all phase distortion, effectively doubling the filter
 34 |     order. If 'fir' is set to True decimate uses an order 30 FIR filter (by
 35 |     default otherwise n), instead of the Chebyshev IIR filter. Here decimate
 36 |     filters the input sequence in only one direction. This technique conserves
 37 |     memory and is useful for working with long sequences.
 38 |     """
 39 |     if fir:
 40 |         if n is None:
 41 |             n = 30
 42 |         b = signal.firwin(n, 1.0/r)
 43 |         a = 1
 44 |         f = signal.lfilter(b, a, s)
 45 |     else: #iir
 46 |         if n is None:
 47 |             n = 8
 48 |         b, a = signal.cheby1(n, 0.05, 0.8/r)
 49 |         f = signal.filtfilt(b, a, s)
 50 |     return downsample(f, r)
 51 | 
 52 | 
 53 | def interp(s, r, l=4, alpha=0.5):
 54 |     """Interpolation - increase sampling rate by integer factor r. Interpolation
 55 |     increases the original sampling rate for a sequence to a higher rate. interp
 56 |     performs lowpass interpolation by inserting zeros into the original sequence
 57 |     and then applying a special lowpass filter. l specifies the filter length
 58 |     and alpha the cut-off frequency. The length of the FIR lowpass interpolating
 59 |     filter is 2*l*r+1. The number of original sample values used for
 60 |     interpolation is 2*l. Ordinarily, l should be less than or equal to 10. The
 61 |     original signal is assumed to be band limited with normalized cutoff
 62 |     frequency 0=alpha=1, where 1 is half the original sampling frequency (the
 63 |     Nyquist frequency). The default value for l is 4 and the default value for
 64 |     alpha is 0.5.
 65 |     """
 66 |     b = signal.firwin(2*l*r+1, alpha/r);
 67 |     a = 1
 68 |     return r*signal.lfilter(b, a, upsample(s, r))[r*l+1:-1]
 69 | 
 70 | 
 71 | def resample(s, p, q, h=None):
 72 |     """Change sampling rate by rational factor. This implementation is based on
 73 |     the Octave implementation of the resample function. It designs the
 74 |     anti-aliasing filter using the window approach applying a Kaiser window with
 75 |     the beta term calculated as specified by [2].
 76 | 
 77 |     Ref [1] J. G. Proakis and D. G. Manolakis,
 78 |     Digital Signal Processing: Principles, Algorithms, and Applications,
 79 |     4th ed., Prentice Hall, 2007. Chap. 6
 80 |     Ref [2] A. V. Oppenheim, R. W. Schafer and J. R. Buck,
 81 |     Discrete-time signal processing, Signal processing series,
 82 |     Prentice-Hall, 1999
 83 |     """
 84 |     gcd = fractions.gcd(p,q)
 85 |     if gcd>1:
 86 |         p=p/gcd
 87 |         q=q/gcd
 88 | 
 89 |     if h is None: #design filter
 90 |         #properties of the antialiasing filter
 91 |         log10_rejection = -3.0
 92 |         stopband_cutoff_f = 1.0/(2.0 * max(p,q))
 93 |         roll_off_width = stopband_cutoff_f / 10.0
 94 | 
 95 |         #determine filter length
 96 |         #use empirical formula from [2] Chap 7, Eq. (7.63) p 476
 97 |         rejection_db = -20.0*log10_rejection;
 98 |         l = numpy.ceil((rejection_db-8.0) / (28.714 * roll_off_width))
 99 | 
100 |         #ideal sinc filter
101 |         t = numpy.arange(-l, l + 1)
102 |         ideal_filter=2*p*stopband_cutoff_f*numpy.sinc(2*stopband_cutoff_f*t)
103 | 
104 |         #determine parameter of Kaiser window
105 |         #use empirical formula from [2] Chap 7, Eq. (7.62) p 474
106 |         beta = signal.kaiser_beta(rejection_db)
107 | 
108 |         #apodize ideal filter response
109 |         h = numpy.kaiser(2*l+1, beta)*ideal_filter
110 | 
111 |     ls = len(s)
112 |     lh = len(h)
113 | 
114 |     l = (lh - 1)/2.0
115 |     ly = numpy.ceil(ls*p/float(q))
116 | 
117 |     #pre and postpad filter response
118 |     nz_pre = numpy.floor(q - numpy.mod(l,q))
119 |     hpad = h[-lh+nz_pre:]
120 | 
121 |     offset = numpy.floor((l+nz_pre)/q)
122 |     nz_post = 0;
123 |     while numpy.ceil(((ls-1)*p + nz_pre + lh + nz_post )/q ) - offset < ly:
124 |         nz_post += 1
125 |     hpad = hpad[:lh + nz_pre + nz_post]
126 | 
127 |     #filtering
128 |     xfilt = upfirdn(s, hpad, p, q)
129 | 
130 |     return xfilt[offset-1:offset-1+ly]
131 | 
132 | 
133 | def upfirdn(s, h, p, q):
134 |     """Upsample signal s by p, apply FIR filter as specified by h, and
135 |     downsample by q. Using fftconvolve as opposed to lfilter as it does not seem
136 |     to do a full convolution operation (and its much faster than convolve).
137 |     """
138 |     return downsample(signal.fftconvolve(h, upsample(s, p)), q)
139 | 
140 | def main():
141 |     """Show simple use cases for functionality provided by this module. Each
142 |     example below attempts to mimic the examples provided by mathworks MATLAB
143 |     documentation, http://www.mathworks.com/help/toolbox/signal/
144 |     """
145 |     import pylab
146 |     argv = sys.argv
147 |     if len(argv) != 1:
148 |         print >>sys.stderr, 'usage: python -m pim.sp.multirate'
149 |         sys.exit(2)
150 | 
151 |     #Downsample
152 |     x = numpy.arange(1, 11)
153 |     print 'Down Sampling %s by 3' % x
154 |     print  downsample(x, 3)
155 |     print 'Down Sampling %s by 3 with phase offset 2' % x
156 |     print  downsample(x, 3, phase=2)
157 | 
158 |     #Upsample
159 |     x = numpy.arange(1, 5)
160 |     print 'Up Sampling %s by 3' % x
161 |     print upsample(x, 3)
162 |     print 'Up Sampling %s by 3 with phase offset 2' % x
163 |     print upsample(x, 3, 2)
164 | 
165 |     #Decimate
166 |     t = numpy.arange(0, 1, 0.00025)
167 |     x = numpy.sin(2*numpy.pi*30*t) + numpy.sin(2*numpy.pi*60*t)
168 |     y = decimate(x,4)
169 |     pylab.figure()
170 |     pylab.subplot(2, 1, 1)
171 |     pylab.title('Original Signal')
172 |     pylab.stem(numpy.arange(len(x[0:120])), x[0:120])
173 |     pylab.subplot(2, 1, 2)
174 |     pylab.title('Decimated Signal')
175 |     pylab.stem(numpy.arange(len(y[0:30])), y[0:30])
176 | 
177 |     #Interp
178 |     t = numpy.arange(0, 1, 0.001)
179 |     x = numpy.sin(2*numpy.pi*30*t) + numpy.sin(2*numpy.pi*60*t)
180 |     y = interp(x,4)
181 |     pylab.figure()
182 |     pylab.subplot(2, 1, 1)
183 |     pylab.title('Original Signal')
184 |     pylab.stem(numpy.arange(len(x[0:30])), x[0:30])
185 |     pylab.subplot(2, 1, 2)
186 |     pylab.title('Interpolated Signal')
187 |     pylab.stem(numpy.arange(len(y[0:120])), y[0:120])
188 | 
189 |     #upfirdn
190 |     L = 147.0
191 |     M = 160.0
192 |     N = 24.0*L
193 |     h = signal.firwin(N-1, 1/M, window=('kaiser', 7.8562))
194 |     h = L*h
195 |     Fs = 48000.0
196 |     n = numpy.arange(0, 10239)
197 |     x  = numpy.sin(2*numpy.pi*1000/Fs*n)
198 |     y = upfirdn(x, h, L, M)
199 |     pylab.figure()
200 |     pylab.stem(n[1:49]/Fs, x[1:49])
201 |     pylab.stem(n[1:45]/(Fs*L/M), y[13:57], 'r', markerfmt='ro',)
202 |     pylab.xlabel('Time (sec)')
203 |     pylab.ylabel('Signal value')
204 | 
205 |     #resample
206 |     fs1 = 10.0
207 |     t1 = numpy.arange(0, 1 + 1.0/fs1, 1.0/fs1)
208 |     x = t1
209 |     y = resample(x, 3, 2)
210 |     t2 = numpy.arange(0,(len(y)))*2.0/(3.0*fs1)
211 |     pylab.figure()
212 |     pylab.plot(t1, x, '*')
213 |     pylab.plot(t2, y, 'o')
214 |     pylab.plot(numpy.arange(-0.5,1.5, 0.01), numpy.arange(-0.5,1.5, 0.01), ':')
215 |     pylab.legend(('original','resampled'))
216 |     pylab.xlabel('Time')
217 | 
218 |     x = numpy.hstack([numpy.arange(1,11), numpy.arange(9,0,-1)])
219 |     y = resample(x,3,2)
220 |     pylab.figure()
221 |     pylab.subplot(2, 1, 1)
222 |     pylab.title('Edge Effects Not Noticeable')
223 |     pylab.plot(numpy.arange(19)+1, x, '*')
224 |     pylab.plot(numpy.arange(29)*2/3.0 + 1, y, 'o')
225 |     pylab.legend(('original', 'resampled'))
226 |     x = numpy.hstack([numpy.arange(10, 0, -1), numpy.arange(2,11)])
227 |     y = resample(x,3,2)
228 |     pylab.subplot(2, 1, 2)
229 |     pylab.plot(numpy.arange(19)+1, x, '*')
230 |     pylab.plot(numpy.arange(29)*2/3.0 + 1, y, 'o')
231 |     pylab.title('Edge Effects Very Noticeable')
232 |     pylab.legend(('original', 'resampled'))
233 | 
234 |     pylab.show()
235 |     return 0
236 | 
237 | if __name__ == '__main__':
238 |     sys.exit(main())
239 | 


--------------------------------------------------------------------------------
/src/sppysound/analysis/FFTAnalysis.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for creating an FFT analysis of audio.
  3 | 
  4 | Ref: Code adapted from:
  5 | http://www.frank-zalkow.de/en/code-snippets/create-audio-spectrograms-with-python.html?ckattempt=1
  6 | """
  7 | from __future__ import print_function, division
  8 | import logging
  9 | from fileops import pathops
 10 | import numpy as np
 11 | from numpy.lib import stride_tricks
 12 | import os
 13 | from AnalysisTools import ButterFilter
 14 | from Analysis import Analysis
 15 | import pdb
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | class FFTAnalysis(Analysis):
 21 |     """
 22 |     FFT analysis descriptor class for generation of FFT spectral analysis.
 23 | 
 24 |     This descriptor calculates the spectral content for overlapping grains
 25 |     of an AnalysedAudioFile object.  A full definition of FFT analysis can be
 26 |     found in the documentation.
 27 | 
 28 |     Arguments:
 29 | 
 30 |     - analysis_group: the HDF5 file group to use for the storage of the
 31 |       analysis.
 32 | 
 33 |     - config: The configuration module used to configure the analysis
 34 |     """
 35 | 
 36 |     def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None):
 37 |         super(FFTAnalysis, self).__init__(AnalysedAudioFile, frames, analysis_group, 'FFT')
 38 |         self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name))
 39 |         # Store reference to the file to be analysed
 40 |         self.AnalysedAudioFile = AnalysedAudioFile
 41 | 
 42 |         if config:
 43 |             window_size = config.fft["window_size"]
 44 |         else:
 45 |             window_size = 2048
 46 |         self.analysis_group = analysis_group
 47 |         self.logger.info("Creating FFT analysis for {0}".format(self.AnalysedAudioFile.name))
 48 |         self.create_analysis(frames, window_size=window_size)
 49 |         self.fft_window_count = None
 50 | 
 51 | 
 52 | 
 53 |     def create_fft_analysis(self, frames, window_size=512, window_overlap=2,
 54 |                             window_type='hanning'):
 55 |         """Create a spectral analysis for overlapping frames of audio."""
 56 |         if hasattr(frames, '__call__'):
 57 |             frames = frames()
 58 |         # Calculate the period of the window in hz
 59 |         lowest_freq = 1.0 / window_size
 60 |         # Filter frequencies lower than the period of the window
 61 |         # filter = ButterFilter()
 62 |         # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate)
 63 | 
 64 |         # frames = filter.filter_butter(frames)
 65 |         stft = self.stft(frames, window_size, overlapFac=1/window_overlap)
 66 |         frame_times = self.calc_fft_frame_times(
 67 |             stft,
 68 |             frames,
 69 |             self.AnalysedAudioFile.samplerate
 70 |         )
 71 |         return (stft, frame_times)
 72 | 
 73 |     def get_analysis_grains(self, start, end):
 74 |         """
 75 |         Retrieve analysis frames for period specified in start and end times.
 76 |         arrays of start and end time pairs will produce an array of equivelant
 77 |         size containing frames for these times.
 78 |         """
 79 |         times = self.analysis_group["FFT"]["times"][:]
 80 |         start = start / 1000
 81 |         end = end / 1000
 82 |         vtimes = times.reshape(-1, 1)
 83 | 
 84 |         selection = np.transpose((vtimes >= start) & (vtimes <= end))
 85 | 
 86 |         np.set_printoptions(threshold=np.nan)
 87 | 
 88 |         grain_data = []
 89 |         for grain in selection:
 90 |             grain_data.append((self.analysis_group["FFT"]["frames"][grain, :], times[grain]))
 91 | 
 92 |         return grain_data
 93 | 
 94 |     def hdf5_dataset_formatter(self, *args, **kwargs):
 95 |         '''
 96 |         Formats the output from the analysis method to save to the HDF5 file.
 97 | 
 98 |         Places data and attributes in 2 dictionaries to be stored in the HDF5
 99 |         file.
100 |         '''
101 |         frames, frame_times = self.create_fft_analysis(*args, **kwargs)
102 |         return (
103 |             {
104 |                 'frames': frames,
105 |                 'times': frame_times
106 |             },
107 |             {
108 |                 'win_size': kwargs.pop('window_size', 512),
109 |                 'overlap': kwargs.pop('overlap', 2),
110 |                 'window_type': kwargs.pop('window_type', 'hanning')
111 |             }
112 |         )
113 | 
114 |     @staticmethod
115 |     def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
116 |         """Short time fourier transform of audio signal."""
117 |         win = window(frameSize)
118 |         hopSize = int(frameSize - np.floor(overlapFac * frameSize))
119 | 
120 |         # zeros at beginning (thus center of 1st window should be for sample nr. 0)
121 |         samples = np.append(np.zeros(np.floor(frameSize/2).astype(int)), sig)
122 |         # cols for windowing
123 | 
124 |         cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1
125 |         # zeros at end (thus samples can be fully covered by frames)
126 |         samples = np.append(samples, np.zeros(frameSize))
127 | 
128 |         frames = stride_tricks.as_strided(
129 |             samples,
130 |             shape=(cols, frameSize),
131 |             strides=(samples.strides[0]*hopSize, samples.strides[0])
132 |         ).copy()
133 | 
134 |         frames *= win
135 | 
136 |         return np.fft.rfft(frames)
137 | 
138 |     '''
139 |     def logscale_spec(self, spec, sr=44100, factor=20.):
140 |         """Scale frequency axis logarithmically."""
141 |         # Get a count of times and frequencies from fft frames
142 |         timebins, freqbins = np.shape(spec)
143 | 
144 |         # Create array from 0 to 1 with values for each frequency bin.
145 |         # Scale by a power of the factor provided.
146 |         scale = np.linspace(0, 1, freqbins) ** factor
147 |         # Scale to the number of frequency bins
148 |         scale *= (freqbins-1)/max(scale)
149 |         # Round to the nearest whole number and reduce to only unique numbers.
150 |         scale = np.unique(np.round(scale))
151 | 
152 |         # Create a new complex number array with the number of time frames and
153 |         # the new number of frequency bins
154 |         newspec = np.complex128(np.zeros([timebins, len(scale)]))
155 |         # For each of the frequency bins
156 |         for i in range(0, len(scale)):
157 |             # If it is the highest frequency bin...
158 |             if i == len(scale)-1:
159 |                 # Sum all frequency bins from the scale index upwards
160 |                 newspec[:, i] = np.sum(spec[:, scale[i]:], axis=1)
161 |             else:
162 |                 # Sum all frequency bins from the current scale index up to the
163 |                 # next scale index
164 |                 newspec[:, i] = np.sum(spec[:, scale[i]:scale[i+1]], axis=1)
165 | 
166 |         # List the center frequency of bins
167 |         allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
168 |         freqs = []
169 |         # For each of the frequency bins
170 |         for i in range(0, len(scale)):
171 |             if i == len(scale)-1:
172 |                 freqs += [np.mean(allfreqs[scale[i]:])]
173 |             else:
174 |                 freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])]
175 | 
176 |         return newspec, freqs
177 | 
178 |     def plotstft(self, samples, fs, binsize=2**10, plotpath=None,
179 |                  colormap="jet"):
180 |         """Plot spectrogram."""
181 |         # Get all fft frames
182 |         s = self.analysis['data'][:]
183 | 
184 |         sshow, freq = self.logscale_spec(s, factor=1.0, sr=fs)
185 | 
186 |         # Amplitude to decibel
187 |         ims = 20.*np.log10(np.abs(sshow)/10e-6)
188 | 
189 |         # Get the dimensions of the fft
190 |         timebins, freqbins = np.shape(ims)
191 | 
192 |         plt.figure(figsize=(15, 7.5))
193 |         plt.imshow(np.transpose(ims), origin="lower", aspect="auto",
194 |                    cmap=colormap)
195 |         # Add a colour bar to the side of the spectrogram.
196 |         plt.colorbar()
197 | 
198 |         # Set spectrogram labels
199 |         plt.xlabel("time (s)")
200 |         plt.ylabel("frequency (hz)")
201 |         plt.xlim([0, timebins-1])
202 |         plt.ylim([0, freqbins])
203 | 
204 |         # Create an array of 5 values from 0 to the number of times
205 |         xlocs = np.float32(np.linspace(0, timebins-1, 5))
206 |         # Display time values at 5 points along the x axis of the graph
207 |         plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/fs])
208 |         # Display frequency values at 10 points along the y axis of the graph
209 |         ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10)))
210 |         plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])
211 | 
212 |         if plotpath:
213 |             plt.savefig(plotpath, bbox_inches="tight")
214 |         else:
215 |             plt.show()
216 | 
217 |         plt.clf()
218 |     '''
219 | 
220 |     def calc_fft_frame_times(self, fftframes, sample_frames, samplerate):
221 |         """Calculate times for frames using sample size and samplerate."""
222 | 
223 |         if hasattr(sample_frames, '__call__'):
224 |             sample_frames = sample_frames()
225 |         # Get number of frames for time and frequency
226 |         timebins, freqbins = np.shape(fftframes)
227 |         # Create array ranging from 0 to number of time frames
228 |         scale = np.arange(timebins+1)
229 |         # divide the number of samples by the total number of frames, then
230 |         # multiply by the frame numbers.
231 |         fft_times = (sample_frames.shape[0]/timebins) * scale[:-1]
232 |         # Divide by the samplerate to give times in seconds
233 |         fft_times = fft_times / samplerate
234 |         return fft_times
235 | 
236 | 
237 | 


--------------------------------------------------------------------------------