├── requirements.txt ├── examples ├── run_tests ├── config.py ├── concatenator ├── src ├── tests │ ├── run_f0_tests.sh │ ├── run_tests.sh │ └── config.py └── sppysound │ ├── synthesis │ ├── __init__.py │ ├── wavegen.py │ ├── timestretch.py │ └── synthesis_tools.py │ ├── Examples │ ├── ExampleTarget │ │ └── ElectricGuitarSample-out.wav │ ├── ExampleFiles │ │ ├── Xylophone.rosewood.roll.ff.B4.stereo.aif │ │ ├── Xylophone.rosewood.roll.ff.B5.stereo.aif │ │ ├── Xylophone.rosewood.roll.ff.B6.stereo.aif │ │ ├── Xylophone.rosewood.roll.ff.B7.stereo.aif │ │ ├── Xylophone.rosewood.roll.ff.Ab6.stereo.aif │ │ ├── Xylophone.rosewood.roll.ff.Ab7.stereo.aif │ │ ├── Xylophone.rosewood.roll.ff.Bb4.stereo.aif │ │ └── Xylophone.rosewood.roll.ff.Bb5.stereo.aif │ ├── README.txt │ ├── analysis_config.py │ ├── matching_config.py │ ├── synthesis_config.py │ ├── Database analysis example.ipynb │ ├── Database Matching Example.ipynb │ └── Match Synthesis Example.ipynb │ ├── analysis │ ├── audiograph.py │ ├── __init__.py │ ├── AnalysisTools.py │ ├── ZeroXAnalysis.py │ ├── PeakAnalysis.py │ ├── SpectralCentroidAnalysis.py │ ├── F0HarmRatioAnalysis.py │ ├── CentroidAnalysis.py │ ├── VarianceAnalysis.py │ ├── SpectralCrestFactorAnalysis.py │ ├── SpectralFlatnessAnalysis.py │ ├── SpectralFluxAnalysis.py │ ├── SkewnessAnalysis.py │ ├── KurtosisAnalysis.py │ ├── RMSAnalysis.py │ ├── SpectralSpreadAnalysis.py │ ├── Analysis.py │ ├── AttackAnalysis.py │ └── FFTAnalysis.py │ ├── __init__.py │ ├── commands.txt │ ├── full_run_MBair.sh │ ├── full_run_MBpro.sh │ ├── docs │ ├── examples.rst │ ├── refs.bib │ ├── index.rst │ ├── api.rst │ ├── installation.rst │ ├── DatabaseAnalysisExample.ipynb │ ├── analysis_config.py │ ├── matching_config.py │ ├── synthesis_config.py │ ├── DatabaseMatchingExample.ipynb │ ├── MatchSynthesisExample.ipynb │ ├── overview.rst │ └── Makefile │ ├── pitch_shift.py │ ├── helper.py │ ├── synthesize_output.py │ ├── create_database.py │ ├── run_matching.py │ ├── config.py │ └── multirate.py ├── .gitignore ├── install.sh ├── setup.py └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples: -------------------------------------------------------------------------------- 1 | ./src/sppysound/Examples -------------------------------------------------------------------------------- /run_tests: -------------------------------------------------------------------------------- 1 | ./src/tests/run_tests.sh -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | ./src/sppysound/config.py -------------------------------------------------------------------------------- /concatenator: -------------------------------------------------------------------------------- 1 | ./src/sppysound/concatenator.py -------------------------------------------------------------------------------- /src/tests/run_f0_tests.sh: -------------------------------------------------------------------------------- 1 | python audiofile_tests.py -v F0AnalysisTests.test_Generatef0 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.wav 3 | *.aiff 4 | *.aif 5 | Session.vim 6 | pip-selfcheck.json 7 | .DS_Store 8 | -------------------------------------------------------------------------------- /src/sppysound/synthesis/__init__.py: -------------------------------------------------------------------------------- 1 | import timestretch 2 | import wavegen 3 | __all__ = ["timestretch", "wavegen"] 4 | -------------------------------------------------------------------------------- /src/sppysound/Examples/ExampleTarget/ElectricGuitarSample-out.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleTarget/ElectricGuitarSample-out.wav -------------------------------------------------------------------------------- /src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B4.stereo.aif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B4.stereo.aif -------------------------------------------------------------------------------- /src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B5.stereo.aif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B5.stereo.aif -------------------------------------------------------------------------------- /src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B6.stereo.aif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B6.stereo.aif -------------------------------------------------------------------------------- /src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B7.stereo.aif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.B7.stereo.aif -------------------------------------------------------------------------------- /src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Ab6.stereo.aif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Ab6.stereo.aif -------------------------------------------------------------------------------- /src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Ab7.stereo.aif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Ab7.stereo.aif -------------------------------------------------------------------------------- /src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Bb4.stereo.aif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Bb4.stereo.aif -------------------------------------------------------------------------------- /src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Bb5.stereo.aif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pezz89/PySoundConcat/HEAD/src/sppysound/Examples/ExampleFiles/Xylophone.rosewood.roll.ff.Bb5.stereo.aif -------------------------------------------------------------------------------- /src/sppysound/Examples/README.txt: -------------------------------------------------------------------------------- 1 | This folder contains three examples of the pysound API usage. They can be run 2 | using the Jupyter notebook unix command. See the documentation for instructions 3 | on running these scripts. 4 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | set -euo pipefail 2 | pip install numpy 3 | pip install scipy 4 | pip install pysndfile 5 | pip install h5py 6 | pip install https://github.com/Pezz89/fileops/zipball/master 7 | pip install -e ./ 8 | pip install sklearn 9 | -------------------------------------------------------------------------------- /src/sppysound/analysis/audiograph.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | def plot_audio(audio_array): 4 | """ 5 | Plots audio to a graph 6 | """ 7 | plt.plot(audio_array) 8 | plt.xlabel("Time (samples)") 9 | plt.ylabel("sample value") 10 | plt.show() 11 | -------------------------------------------------------------------------------- /src/sppysound/__init__.py: -------------------------------------------------------------------------------- 1 | from audiofile import AudioFile, AnalysedAudioFile 2 | from database import AudioDatabase 3 | import analysis 4 | import synthesis 5 | __all__ = [ 6 | "analysis", 7 | "synthesis", 8 | "AudioFile", 9 | "AnalysedAudioFile", 10 | "AudioDatabase" 11 | ] 12 | -------------------------------------------------------------------------------- /src/sppysound/Examples/analysis_config.py: -------------------------------------------------------------------------------- 1 | rms = { 2 | "window_size": 100, 3 | "overlap": 2, 4 | } 5 | 6 | analysis_dict = { 7 | "f0": "log2_median", 8 | "rms": "mean" 9 | } 10 | 11 | analysis = { 12 | "reanalyse": False 13 | } 14 | 15 | output_file = { 16 | "samplerate": 44100, 17 | "format": 131075, 18 | "channels": 1 19 | } 20 | -------------------------------------------------------------------------------- /src/sppysound/commands.txt: -------------------------------------------------------------------------------- 1 | ./create_database.py ~/AudioDatabases/Vocal_examples ~/AnalysedAudioDatabases/Vocal_examples --reanalyse 2 | ./create_database.py ~/AudioDatabases/Viola ~/AnalysedAudioDatabases/Viola3 --reanalyse 3 | ./run_matching.py ~/AnalysedAudioDatabases/Viola3 ~/AnalysedAudioDatabases/Vocal_examples ~/OutputDatabases/TestOutput --rematch 4 | ./synthesize_output.py ~/AnalysedAudioDatabases/Viola3 ~/OutputDatabases/TestOutput ~/AnalysedAudioDatabases/Vocal_examples 5 | -------------------------------------------------------------------------------- /src/sppysound/full_run_MBair.sh: -------------------------------------------------------------------------------- 1 | set -euo pipefail 2 | ./create_database.py ~/AudioDatabases/Vocal_examples ~/AnalysedAudioDatabases/Vocal_examples --reanalyse 3 | ./create_database.py ~/AudioDatabases/Viola ~/AnalysedAudioDatabases/Viola3 --reanalyse 4 | ./run_matching.py ~/AnalysedAudioDatabases/Viola3 ~/AnalysedAudioDatabases/Vocal_examples ~/OutputDatabases/TestOutput --rematch 5 | ./synthesize_output.py ~/AnalysedAudioDatabases/Viola3 ~/OutputDatabases/TestOutput ~/AnalysedAudioDatabases/Vocal_examples 6 | -------------------------------------------------------------------------------- /src/tests/run_tests.sh: -------------------------------------------------------------------------------- 1 | SOURCE="${BASH_SOURCE[0]}" 2 | while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink 3 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 4 | SOURCE="$(readlink "$SOURCE")" 5 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located 6 | done 7 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 8 | cd "$DIR" 9 | python -m unittest -v audiofile_tests 10 | -------------------------------------------------------------------------------- /src/sppysound/full_run_MBpro.sh: -------------------------------------------------------------------------------- 1 | set -euo pipefail 2 | ./create_database.py /Volumes/Storage/AudioDatabases/Viola ~/AllDatabases/AnalysedAudioDatabases/Viola3 --reanalyse 3 | ./create_database.py /Volumes/Storage/AudioDatabases/Vocal_examples ~/AllDatabases/AnalysedAudioDatabases/Vocal_examples --reanalyse 4 | ./run_matching.py ~/AllDatabases/AnalysedAudioDatabases/Viola3 ~/AllDatabases/AnalysedAudioDatabases/Vocal_examples ~/AllDatabases/OutputDatabases/TestOutput --rematch 5 | ./synthesize_output.py ~/AllDatabases/AnalysedAudioDatabases/Viola3 ~/AllDatabases/OutputDatabases/TestOutput ~/AllDatabases/AnalysedAudioDatabases/Vocal_examples 6 | -------------------------------------------------------------------------------- /src/sppysound/Examples/matching_config.py: -------------------------------------------------------------------------------- 1 | rms = { 2 | "window_size": 100, 3 | "overlap": 2, 4 | } 5 | 6 | analysis_dict = { 7 | "f0": "log2_median", 8 | "rms": "mean" 9 | } 10 | 11 | matcher_weightings = { 12 | "f0" : 1., 13 | "rms": 1. 14 | } 15 | 16 | analysis = { 17 | "reanalyse": False 18 | } 19 | 20 | matcher = { 21 | "rematch": False, 22 | "grain_size": 100, 23 | "overlap": 2, 24 | # Defines the number of matches to keep for synthesis. 25 | "match_quantity": 20 26 | } 27 | 28 | output_file = { 29 | "samplerate": 44100, 30 | "format": 131075, 31 | "channels": 1 32 | } 33 | -------------------------------------------------------------------------------- /src/sppysound/Examples/synthesis_config.py: -------------------------------------------------------------------------------- 1 | rms = { 2 | "window_size": 100, 3 | "overlap": 2, 4 | } 5 | 6 | analysis_dict = { 7 | "f0": "log2_median", 8 | "rms": "mean" 9 | } 10 | 11 | analysis = { 12 | "reanalyse": False 13 | } 14 | 15 | output_file = { 16 | "samplerate": 44100, 17 | "format": 131075, 18 | "channels": 1 19 | } 20 | 21 | synthesizer = { 22 | "enforce_rms": True, 23 | "enf_rms_ratio_limit": 5., 24 | "enforce_f0": True, 25 | "enf_f0_ratio_limit": 10., 26 | "grain_size": 100, 27 | "overlap": 2, 28 | "normalize" : True, 29 | # Defines the number of potential grains to choose from matches when 30 | # synthesizing output. 31 | "match_quantity": 20 32 | } 33 | -------------------------------------------------------------------------------- /src/sppysound/docs/examples.rst: -------------------------------------------------------------------------------- 1 | API Usage Examples 2 | ================== 3 | 4 | This section aims to give use case examples for the API. This shows how the 5 | code can be used to create a python script similar to that of the concatenator 6 | script. 7 | 8 | WARNING: As examples use the same files, only one notebook can be run at a 9 | time. Make sure one notebook has been closed and halted before starting the 10 | next. 11 | 12 | This section contains three examples of how to use the API for creating and 13 | comparing audio databases. Examples have been created using Jupyter interactive 14 | notebooks that can be found in the project folder and used to step through the 15 | code line by line interactively to see results. 16 | 17 | .. toctree:: 18 | :maxdepth: 2 19 | 20 | DatabaseAnalysisExample 21 | 22 | DatabaseMatchingExample 23 | 24 | MatchSynthesisExample 25 | -------------------------------------------------------------------------------- /src/sppysound/docs/refs.bib: -------------------------------------------------------------------------------- 1 | @BOOK{smith2011sasp, 2 | AUTHOR = "Julius O. Smith", 3 | TITLE = "Spectral Audio Signal Processing", 4 | PUBLISHER = "http://ccrma.stanford.edu/~jos/sasp/", 5 | YEAR = "accessed 21.03.2016", 6 | NOTE = "online book, 2011 edition" 7 | } 8 | 9 | @book{itaa2014, 10 | title = {Introduction to Audio Analysis. A MATLAB Approach}, 11 | author = {Theodoros Giannakopoulos and Aggelos Pikrakis}, 12 | publisher = {Academic Press}, 13 | isbn = {978-0-08-099388-1}, 14 | year = {2014}, 15 | edition = {1}, 16 | } 17 | 18 | @book{lerch2012itaca, 19 | title = {An Introduction to Audio Content Analysis: Applications in Signal Processing and Music Informatics}, 20 | author = {Alexander Lerch}, 21 | publisher = {Wiley-IEEE Press}, 22 | isbn = {9781118266823,9781118393550}, 23 | year = {2012}, 24 | } 25 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | 5 | def read(fname): 6 | """ 7 | Utility function to read the README file. 8 | 9 | Used for the long_description. It's nice, because now 1) we have a top level 10 | README file and 2) it's easier to type in the README file than to put a raw 11 | string in below ... 12 | """ 13 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 14 | 15 | setup( 16 | name="sppysound", 17 | version="1.0", 18 | author="Sam Perry", 19 | author_email="u1265119@unimail.hud.ac.uk", 20 | description=("A library for audio analysis and synthesis."), 21 | license="GPL", 22 | keywords="synthesis audio", 23 | url="https://github.com/Pezz89/pysound", 24 | package_dir={'': 'src'}, 25 | packages=find_packages(where='src'), 26 | setup_requires=["numpy"], # Just numpy here 27 | install_requires=read('requirements.txt') 28 | ) 29 | -------------------------------------------------------------------------------- /src/sppysound/pitch_shift.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pdb 3 | import subprocess 4 | 5 | from audiofile import AudioFile 6 | 7 | def shift(sigin, pitch): 8 | if np.isnan(pitch): 9 | return sigin 10 | input_filepath = "./.shift_input.wav" 11 | output_filepath = "./.shift_output.wav" 12 | 13 | shift_input = AudioFile.gen_default_wav( 14 | input_filepath, 15 | overwrite_existing=True, 16 | mode='w', 17 | channels=1, 18 | ) 19 | # Write grain to be shifted to file 20 | shift_input.write_frames(sigin) 21 | # Close file 22 | del shift_input 23 | 24 | cents = 1200. * np.log2(pitch) 25 | p_shift_args = ["sox", input_filepath, output_filepath, "pitch", str(cents)] 26 | 27 | p = subprocess.Popen(p_shift_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 28 | (output, err) = p.communicate() 29 | 30 | with AudioFile(output_filepath, mode='r') as shift_output: 31 | # Read result 32 | result = shift_output.read_grain() 33 | return result 34 | -------------------------------------------------------------------------------- /src/sppysound/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | from Analysis import Analysis 2 | from RMSAnalysis import RMSAnalysis 3 | from ZeroXAnalysis import ZeroXAnalysis 4 | from FFTAnalysis import FFTAnalysis 5 | from SpectralCentroidAnalysis import SpectralCentroidAnalysis 6 | from SpectralSpreadAnalysis import SpectralSpreadAnalysis 7 | from SpectralFluxAnalysis import SpectralFluxAnalysis 8 | from SpectralCrestFactorAnalysis import SpectralCrestFactorAnalysis 9 | from SpectralFlatnessAnalysis import SpectralFlatnessAnalysis 10 | from PeakAnalysis import PeakAnalysis 11 | from F0Analysis import F0Analysis 12 | from CentroidAnalysis import CentroidAnalysis 13 | from VarianceAnalysis import VarianceAnalysis 14 | from KurtosisAnalysis import KurtosisAnalysis 15 | from SkewnessAnalysis import SkewnessAnalysis 16 | from F0HarmRatioAnalysis import F0HarmRatioAnalysis 17 | import AnalysisTools 18 | __all__ = [ 19 | "Analysis", 20 | "ZeroXAnalysis", 21 | "RMSAnalysis", 22 | "AnalysisTools", 23 | "FFTAnalysis", 24 | "SpectralCentroidAnalysis", 25 | "SpectralSpreadAnalysis", 26 | "SpectralFluxAnalysis", 27 | "SpectralCrestFactorAnalysis", 28 | "SpectralFlatnessAnalysis", 29 | "F0Analysis", 30 | "PeakAnalysis", 31 | "CentroidAnalysis", 32 | "VarianceAnalysis", 33 | "KurtosisAnalysis", 34 | "SkewnessAnalysis", 35 | "F0HarmRatioAnalysis" 36 | ] 37 | -------------------------------------------------------------------------------- /src/sppysound/synthesis/wavegen.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def gen_wave( 4 | size, 5 | freq, 6 | wave_type, 7 | phase = 0.0, 8 | amplitude = 1.0, 9 | samplerate = 44100 10 | ): 11 | """ 12 | Generates a numpy array of given size (seconds) containing a wave of given 13 | type and frequency at the samplerate specified 14 | Note: Waves generated are raw and not anti-aliased. For audio signals 15 | consider using other algorithms 16 | """ 17 | 18 | def sine(): 19 | samples = np.arange(0, size, 1. / samplerate) 20 | return amplitude * np.sin(2.0*np.pi*freq*samples) 21 | 22 | def square(): 23 | return amplitude * np.sign(sine()) 24 | 25 | def triangle(): 26 | samples = np.arange(0, size, 1. / samplerate) 27 | return amplitude - (2 * np.abs(samples * (2 * freq) % (2*amplitude) - amplitude)) 28 | 29 | def sawtooth(): 30 | samples = np.arange(0, size, 1. / samplerate) 31 | return amplitude - (2 * np.abs((samples * freq) % amplitude - amplitude)) 32 | 33 | def reverse_saw(): 34 | samples = np.arange(0, size, 1. / samplerate) 35 | return amplitude - (2 * np.abs(((samples * freq) % amplitude))) 36 | 37 | options = { 38 | "sine" : sine, 39 | "square" : square, 40 | "tri" : triangle, 41 | "saw" : sawtooth, 42 | "rev_saw" : reverse_saw 43 | } 44 | 45 | return options[wave_type]() 46 | -------------------------------------------------------------------------------- /src/sppysound/analysis/AnalysisTools.py: -------------------------------------------------------------------------------- 1 | """A collection of useful tools for multiple audio analyses.""" 2 | 3 | from __future__ import division 4 | from scipy.signal import butter, lfilter 5 | import logging 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class ButterFilter: 11 | def __init__(self, *args, **kwargs): 12 | self.filtervalues = None 13 | self.logger = logging.getLogger(__name__ + '.ButterFilter') 14 | 15 | def design_butter(self, cutoff, fs, filtertype='high', order=5): 16 | """ 17 | Generate a butterworth filter of type and order specified. 18 | 19 | Calculates the cutoff frequency based on the samplerate. 20 | """ 21 | # Ref: This code has been adapted from: 22 | # http://stackoverflow.com/questions/25191620/creating-lowpass-filter-in-scipy-understanding-methods-and-units 23 | 24 | # Calculate nyquist rate 25 | nyq = 0.5 * fs 26 | # Calculate the cutoff based on the nyquist rate 27 | normal_cutoff = cutoff / nyq 28 | # Calcuate filter coefficients based on parameters 29 | b, a = butter(order, normal_cutoff, btype=filtertype, analog=False) 30 | self.filtervalues = b, a 31 | 32 | 33 | def filter_butter(self, data): 34 | """Filter audio using a butterworth filter.""" 35 | # Filter audio using coefficients generated 36 | y = lfilter(self.filtervalues[0], self.filtervalues[1], data) 37 | return y 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PySoundConcat 2 | 3 | WARNING: There are a currently a number of problems with the synthesis stage of 4 | this project that may result in less than ideal results. These will hopefully 5 | be fixed in the near future. 6 | 7 | A python script for generating granular synthesis driven representations of 8 | audio files based on audio database analysis. 9 | 10 | This script was developed to explore the creative potential of combining 11 | short-time audio analyses with granular synthesis, to synthesize perceptually 12 | related representations of target audio files. Through use of analysed 13 | databases of varying sizes, an output can be generated that represents a mix of 14 | the spectral and temporal features of the original target sound and the corpus 15 | of source sounds. 16 | 17 | To achieve this, a technique known as "concatenative synthesis" is used. This 18 | form of synthesis combines the ability to window and join small segments of 19 | sound to create a new sound (a process known as granular synthesis), with audio 20 | analysis techniques capable of describing a sound in order to differentiate it 21 | from others. By analysing small segments in a target sound for their perceptual 22 | characteristics (such as pitch, timbre and loudness), it is then possible to 23 | compare these segments to a collection of source sounds to find perceptually 24 | similar segments. From this, the most perceptually similar matches can be taken 25 | and joined using granular synthesis techniques in order to achieve the final 26 | result. 27 | 28 | Full documentation can be found at: http://pezz89.github.io/PySoundConcat/index.html 29 | -------------------------------------------------------------------------------- /src/sppysound/docs/index.rst: -------------------------------------------------------------------------------- 1 | .. The Concatenator documentation master file, created by 2 | sphinx-quickstart on Tue Mar 15 18:41:42 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Concatenator's documentation! 7 | ======================================== 8 | 9 | A python script for generating granular synthesis driven representations of 10 | audio files based on audio database analysis. 11 | 12 | .. image:: Concatenator_diagram_scaled_small.jpg 13 | :align: center 14 | :scale: 100% 15 | 16 | This script was developed to explore the creative potential of combining 17 | short-time audio analyses with granular synthesis, to synthesize perceptually 18 | related representations of target audio files. Through use of analysed 19 | databases of varying sizes, an output can be generated that represents a mix of 20 | the spectral and temporal features of the original target sound and the corpus 21 | of source sounds. 22 | 23 | To achieve this, a technique known as "concatenative synthesis" is used. This 24 | form of synthesis combines the ability to window and join small segments of 25 | sound to create a new sound (a process known as granular synthesis), with audio 26 | analysis techniques capable of describing a sound in order to differentiate it 27 | from others. By analysing small segments in a target sound for their perceptual 28 | characteristics (such as pitch, timbre and loudness), it is then possible to 29 | compare these segments to a collection of source sounds to find perceptually 30 | similar segments. From this, the most perceptually similar matches can be taken 31 | and joined using granular synthesis techniques in order to achieve the final 32 | result. 33 | 34 | 35 | .. toctree:: 36 | :maxdepth: 3 37 | 38 | overview 39 | installation 40 | tutorial 41 | descriptor_defs 42 | api 43 | examples 44 | -------------------------------------------------------------------------------- /src/sppysound/helper.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | class OrderedSet(collections.MutableSet): 4 | ''' 5 | Defines a set object that remembers the order that items are added to it. 6 | 7 | Taken from: http://code.activestate.com/recipes/576694/ 8 | ''' 9 | 10 | def __init__(self, iterable=None): 11 | self.end = end = [] 12 | end += [None, end, end] # sentinel node for doubly linked list 13 | self.map = {} # key --> [key, prev, next] 14 | if iterable is not None: 15 | self |= iterable 16 | 17 | def __len__(self): 18 | return len(self.map) 19 | 20 | def __contains__(self, key): 21 | return key in self.map 22 | 23 | def add(self, key): 24 | if key not in self.map: 25 | end = self.end 26 | curr = end[1] 27 | curr[2] = end[1] = self.map[key] = [key, curr, end] 28 | 29 | def discard(self, key): 30 | if key in self.map: 31 | key, prev, next = self.map.pop(key) 32 | prev[2] = next 33 | next[1] = prev 34 | 35 | def __iter__(self): 36 | end = self.end 37 | curr = end[2] 38 | while curr is not end: 39 | yield curr[0] 40 | curr = curr[2] 41 | 42 | def __reversed__(self): 43 | end = self.end 44 | curr = end[1] 45 | while curr is not end: 46 | yield curr[0] 47 | curr = curr[1] 48 | 49 | def pop(self, last=True): 50 | if not self: 51 | raise KeyError('set is empty') 52 | key = self.end[1][0] if last else self.end[2][0] 53 | self.discard(key) 54 | return key 55 | 56 | def __repr__(self): 57 | if not self: 58 | return '%s()' % (self.__class__.__name__,) 59 | return '%s(%r)' % (self.__class__.__name__, list(self)) 60 | 61 | def __eq__(self, other): 62 | if isinstance(other, OrderedSet): 63 | return len(self) == len(other) and list(self) == list(other) 64 | return set(self) == set(other) 65 | 66 | -------------------------------------------------------------------------------- /src/tests/config.py: -------------------------------------------------------------------------------- 1 | rms = { 2 | "window_size": 130, 3 | "overlap": 16, 4 | } 5 | 6 | f0 = { 7 | "window_size": 2048, 8 | "overlap": 8, 9 | "ratio_threshold": 0.0 10 | } 11 | 12 | variance = { 13 | "window_size": 130, 14 | "overlap": 16 15 | } 16 | 17 | kurtosis = { 18 | "window_size": 130, 19 | "overlap": 16 20 | } 21 | 22 | skewness = { 23 | "window_size": 130, 24 | "overlap": 16 25 | } 26 | 27 | fft = { 28 | "window_size": 65536 29 | } 30 | 31 | 32 | matcher_weightings = { 33 | "f0" : 1., 34 | "spccntr" : 1., 35 | "spcsprd" : 1., 36 | "spcflux" : 1., 37 | "spccf" : 1., 38 | "spcflatness": 1., 39 | "zerox" : 1., 40 | "rms" : 1., 41 | "peak": 1., 42 | "centroid": 1., 43 | "kurtosis": 1., 44 | "skewness": 1., 45 | "variance": 3., 46 | "harm_ratio": 1. 47 | } 48 | 49 | analysis_dict = { 50 | "f0": "log2_median", 51 | "rms": "mean", 52 | "zerox": "mean", 53 | "spccntr": "mean", 54 | "spcsprd": "mean", 55 | "spcflux": "mean", 56 | "spccf": "mean", 57 | "spcflatness": "mean", 58 | "peak": "mean", 59 | "centroid": "mean", 60 | "kurtosis": "mean", 61 | "skewness": "mean", 62 | "variance": "mean", 63 | "harm_ratio": "mean" 64 | } 65 | 66 | analysis = { 67 | "reanalyse": False 68 | } 69 | 70 | matcher = { 71 | "rematch": True, 72 | "grain_size": 130, 73 | "overlap": 16, 74 | # Defines the number of matches to keep for synthesis. Note that this must 75 | # also be specified in the synthesis config 76 | "match_quantity": 20 77 | } 78 | 79 | synthesizer = { 80 | "enforce_rms": True, 81 | "enf_rms_ratio_limit": 5., 82 | "enforce_f0": True, 83 | "enf_f0_ratio_limit": 10., 84 | "grain_size": 130, 85 | "overlap": 16, 86 | "normalize" : True, 87 | # Defines the number of potential grains to choose from matches when 88 | # synthesizing output. 89 | "match_quantity": 20 90 | } 91 | 92 | output_file = { 93 | "samplerate": 44100, 94 | "format": 131075, 95 | "channels": 1 96 | } 97 | 98 | database = { 99 | "symlink": True 100 | } 101 | -------------------------------------------------------------------------------- /src/sppysound/synthesize_output.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Command line interface for matching databases""" 4 | 5 | import argparse 6 | import audiofile 7 | import logging 8 | from fileops import loggerops 9 | import pdb 10 | import os 11 | import __builtin__ 12 | import config 13 | from database import AudioDatabase, Synthesizer 14 | 15 | filename = os.path.splitext(__file__)[0] 16 | logger = loggerops.create_logger(log_filename='./{0}.log'.format(filename)) 17 | 18 | def main(): 19 | """Parse arguments then generate database.""" 20 | logger.info('Started') 21 | parser = argparse.ArgumentParser( 22 | description='Generate a database at argument 1 based on files in ' 23 | 'argument 2.' 24 | ) 25 | parser.add_argument( 26 | 'source', 27 | type=str, 28 | help='source database directory' 29 | ) 30 | parser.add_argument( 31 | 'output', 32 | type=str, 33 | help='output database directory' 34 | ) 35 | parser.add_argument( 36 | 'target', 37 | type=str, 38 | help='target database directory', 39 | default=None 40 | ) 41 | args = parser.parse_args() 42 | 43 | # Load database of samples to be used for output synthesis 44 | source_db = AudioDatabase( 45 | args.source, 46 | config=config, 47 | analysis_list={"f0", "rms", "peak"} 48 | ) 49 | # Create/load a pre-existing database 50 | source_db.load_database(reanalyse=False) 51 | 52 | # Load database used to generate matches to source database. 53 | # This is used when enforcing analyses such as RMS and F0. (Original grains 54 | # are needed to calculate the ratio to alter the synthesized grain by) 55 | target_db = AudioDatabase( 56 | args.target, 57 | config=config, 58 | analysis_list={"f0", "rms", "peak"} 59 | ) 60 | # Create/load a pre-existing database 61 | target_db.load_database(reanalyse=False) 62 | 63 | output_db = AudioDatabase( 64 | args.output, 65 | config=config 66 | ) 67 | # Create/load a pre-existing database 68 | output_db.load_database(reanalyse=False) 69 | 70 | synthesizer = Synthesizer(source_db, output_db, target_db=target_db, config=config) 71 | synthesizer.synthesize(grain_size=config.synthesizer["grain_size"], overlap=config.synthesizer["overlap"]) 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /src/sppysound/docs/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | This section contains the details of the underlying code used to create the 4 | concatenator script. This is included so that developer may use the library for 5 | further work in this area and aims to provide a complete description of 6 | functions and class structures in an easy to use format. 7 | 8 | ------------------- 9 | AudioFile Class 10 | ------------------- 11 | .. autoclass:: audiofile.AudioFile 12 | :members: 13 | 14 | --------------------------- 15 | AnalysedAudioFile Class 16 | --------------------------- 17 | .. autoclass:: audiofile.AnalysedAudioFile 18 | :members: 19 | 20 | ------------------- 21 | Database Class 22 | ------------------- 23 | .. autoclass:: database.AudioDatabase 24 | :members: 25 | 26 | ------------------- 27 | Matcher Class 28 | ------------------- 29 | .. autoclass:: database.Matcher 30 | :members: 31 | 32 | --------------------------- 33 | Synthesizer Class 34 | --------------------------- 35 | .. autoclass:: database.Synthesizer 36 | :members: 37 | 38 | --------------------------- 39 | Analysis Classes 40 | --------------------------- 41 | .. autoclass:: analysis.Analysis 42 | :members: 43 | Centroid Analysis Class 44 | ~~~~~~~~~~~~~~~~~~~~~~~ 45 | 46 | .. autoclass:: analysis.CentroidAnalysis 47 | :members: 48 | 49 | F0 Analysis Class 50 | ~~~~~~~~~~~~~~~~~~~~~~~ 51 | 52 | .. autoclass:: analysis.F0Analysis 53 | :members: 54 | 55 | FFT Analysis Class 56 | ~~~~~~~~~~~~~~~~~~~~~~~ 57 | 58 | .. autoclass:: analysis.FFTAnalysis 59 | :members: 60 | 61 | Harmonic Ratio Analysis Class 62 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 63 | 64 | .. autoclass:: analysis.F0HarmRatioAnalysis 65 | :members: 66 | 67 | Kurtosis Analysis Class 68 | ~~~~~~~~~~~~~~~~~~~~~~~ 69 | 70 | .. autoclass:: analysis.KurtosisAnalysis 71 | :members: 72 | 73 | Peak Analysis Class 74 | ~~~~~~~~~~~~~~~~~~~~~~~ 75 | 76 | .. autoclass:: analysis.PeakAnalysis 77 | :members: 78 | 79 | RMS Analysis Class 80 | ~~~~~~~~~~~~~~~~~~~~~~~ 81 | 82 | .. autoclass:: analysis.RMSAnalysis 83 | :members: 84 | 85 | Spectral Centroid Analysis Class 86 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 87 | 88 | .. autoclass:: analysis.SpectralCentroidAnalysis 89 | :members: 90 | 91 | Spectral Crest Factor Analysis Class 92 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 93 | 94 | .. autoclass:: analysis.SpectralCrestFactorAnalysis 95 | :members: 96 | 97 | Spectral Flatness Analysis Class 98 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 99 | 100 | .. autoclass:: analysis.SpectralFlatnessAnalysis 101 | :members: 102 | 103 | Spectral Flux Analysis Class 104 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 105 | 106 | .. autoclass:: analysis.SpectralFluxAnalysis 107 | :members: 108 | 109 | Spectral Spread Analysis Class 110 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 111 | 112 | .. autoclass:: analysis.SpectralSpreadAnalysis 113 | :members: 114 | 115 | Variance Analysis Class 116 | ~~~~~~~~~~~~~~~~~~~~~~~ 117 | 118 | .. autoclass:: analysis.VarianceAnalysis 119 | :members: 120 | 121 | Zero-Crossing Analysis Class 122 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 123 | 124 | .. autoclass:: analysis.ZeroXAnalysis 125 | :members: 126 | 127 | -------------------------------------------------------------------------------- /src/sppysound/synthesis/timestretch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | #import pysndfile as psf 4 | #import audio_funcs as af 5 | 6 | def x_corr_time_lag(grain1, grain2): 7 | """ 8 | Calculate the the time lag between two grains where grain2 is at maximum 9 | similarity to grain1 10 | """ 11 | correlation = np.correlate(grain1, grain2, mode = "full") 12 | time_lag = np.argmax(correlation) - grain1.size 13 | return time_lag 14 | 15 | 16 | def cheat_granulate_audio( 17 | input_audio, 18 | output_audio, 19 | ): 20 | grain_time_diff = 256 21 | grain_size = 2048 22 | stretch_factor = 1.9 23 | grain_reposition = round(grain_time_diff * stretch_factor) 24 | overlap_size = grain_time_diff * stretch_factor / 2.0 25 | total_num_segs = int(math.ceil(input_audio.frames() / grain_time_diff)) 26 | input_audio.seek(0, 0) 27 | 28 | grain1 = af.read_grain(input_audio, 0, grain_size) 29 | 30 | #starts at 2nd grain and calculates between second and first, 31 | #then iterates through grains 32 | i = 1 33 | print "Segment No.: ", total_num_segs 34 | while i < total_num_segs - 1: 35 | grain2 = af.read_grain(input_audio, i*grain_time_diff, grain_size) 36 | #Read overlap with next grain to calculate the X-correlation 37 | time_lag = x_corr_time_lag( 38 | grain2[0:overlap_size], 39 | grain1[grain_reposition:grain_reposition+overlap_size], 40 | ) 41 | time_lag = 0 42 | fadein = np.linspace( 43 | 0.0, 44 | 1.0, 45 | grain1.size - (i*grain_reposition-overlap_size+time_lag) # 46 | ) 47 | fadeout = np.linspace( 48 | 1.0, 49 | 0.0, 50 | grain1.size - ((i*grain_reposition-overlap_size)+time_lag) # 51 | ) 52 | tail = grain1[(i*grain_reposition-overlap_size)+time_lag:grain1.size-1]*fadeout 53 | begin = grain2[0:fadein.size]*fadein 54 | add = begin + tail 55 | grain1 = np.concatenate( 56 | (grain1[:i*grain_reposition-overlap_size+time_lag], 57 | add, 58 | grain2[fadein.size:grain_size]) 59 | ) 60 | print grain1.size 61 | if i == 200: 62 | break 63 | i += 1 64 | output_audio.write_frames(grain1) 65 | exit() 66 | 67 | def granulate_audio( 68 | input_audio, 69 | output_audio, 70 | stretch=1.5, 71 | window_size=1000, 72 | offset = 500, 73 | overlap = 250 74 | ): 75 | """ 76 | Time-stretches audio using SOLA granulation 77 | """ 78 | input_grains = np.array([]) 79 | offset_count = 0 80 | #Read audio into grains of set size with set offset 81 | while True: 82 | #Read as many full windows of audio as possible 83 | try: 84 | read_frames = input_audio.read_frames(window_size) 85 | offset_count += offset 86 | input_audio.seek(offset_count, 0) 87 | if not input_grains.size: 88 | input_grains = np.array([read_frames]) 89 | else: 90 | input_grains = np.append(input_grains, [read_frames], axis = 0) 91 | except RuntimeError: 92 | break 93 | i = 0 94 | while i < input_grains.shape[0] - 1: 95 | print i 96 | i += 1 97 | print input_grains.shape 98 | 99 | 100 | #find the best overlap point fo the x-fade by calculating the cross 101 | #correlation 102 | 103 | time_shift = int(round(offset * stretch)) 104 | -------------------------------------------------------------------------------- /src/sppysound/analysis/ZeroXAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | import logging 4 | from numpy.lib import stride_tricks 5 | from Analysis import Analysis 6 | import pdb 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class ZeroXAnalysis(Analysis): 12 | 13 | """ 14 | Zero-corssing descriptor class for generation of zero-crossing rate 15 | analysis. 16 | 17 | This descriptor calculates the zero-crossing rate for overlapping grains of 18 | an AnalysedAudioFile object. A full definition of zero-crossing analysis 19 | can be found in the documentation. 20 | 21 | Arguments: 22 | 23 | - analysis_group: the HDF5 file group to use for the storage of the 24 | analysis. 25 | 26 | - config: The configuration module used to configure the analysis 27 | """ 28 | 29 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 30 | super(ZeroXAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'ZeroCrossing') 31 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 32 | self.analysis_group = analysis_group 33 | self.logger.info("Creating zero crossing analysis for {0}".format(self.AnalysedAudioFile.name)) 34 | self.create_analysis(frames) 35 | 36 | @staticmethod 37 | def create_zerox_analysis( 38 | frames, 39 | window_size=512, 40 | overlapFac=0.5, 41 | *args, 42 | **kwargs 43 | ): 44 | """Generate zero crossing value for window of the signal""" 45 | if hasattr(frames, '__call__'): 46 | frames = frames() 47 | hopSize = int(window_size - np.floor(overlapFac * window_size)) 48 | 49 | # zeros at beginning (thus center of 1st window should be for sample nr. 0) 50 | samples = np.append(np.zeros(np.floor(window_size/2.0)), frames) 51 | 52 | # cols for windowing 53 | cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1 54 | # zeros at end (thus samples can be fully covered by frames) 55 | samples = np.append(samples, np.zeros(window_size)) 56 | 57 | # TODO: Better handeling of zeros based on previous sign would improve 58 | # accuracy. 59 | epsilon = np.finfo(float).eps 60 | samples[samples == 0.] += epsilon 61 | 62 | frames = stride_tricks.as_strided( 63 | samples, 64 | shape=(cols, window_size), 65 | strides=(samples.strides[0]*hopSize, samples.strides[0]) 66 | ).copy() 67 | zero_crossing = np.sum(np.abs(np.diff(np.sign(frames))), axis=1) 68 | return zero_crossing 69 | 70 | @staticmethod 71 | def calc_zerox_frame_times(zerox_frames, sample_frames, samplerate): 72 | 73 | """Calculate times for frames using sample size and samplerate.""" 74 | 75 | if hasattr(sample_frames, '__call__'): 76 | sample_frames = sample_frames() 77 | # Get number of frames for time and frequency 78 | timebins = zerox_frames.shape[0] 79 | # Create array ranging from 0 to number of time frames 80 | scale = np.arange(timebins+1) 81 | # divide the number of samples by the total number of frames, then 82 | # multiply by the frame numbers. 83 | zerox_times = (sample_frames.shape[0]/timebins) * scale[:-1] 84 | # Divide by the samplerate to give times in seconds 85 | zerox_times = zerox_times / samplerate 86 | return zerox_times 87 | 88 | def hdf5_dataset_formatter(self, *args, **kwargs): 89 | ''' 90 | Formats the output from the analysis method to save to the HDF5 file. 91 | ''' 92 | samples = self.AnalysedAudioFile.read_grain() 93 | samplerate = self.AnalysedAudioFile.samplerate 94 | output = self.create_zerox_analysis(*args, **kwargs) 95 | times = self.calc_zerox_frame_times(output, args[0], samplerate) 96 | return ({'frames': output, 'times': times}, {}) 97 | -------------------------------------------------------------------------------- /src/sppysound/create_database.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Command line interface for generating an analysed audio file database.""" 4 | 5 | import argparse 6 | import audiofile 7 | import logging 8 | from fileops import loggerops 9 | import pdb 10 | import os 11 | from database import AudioDatabase 12 | import config 13 | import __builtin__ 14 | 15 | filename = os.path.splitext(__file__)[0] 16 | logger = loggerops.create_logger(log_filename='./{0}.log'.format(filename)) 17 | 18 | ########################################################################### 19 | # File open and closing monitoring 20 | openfiles = set() 21 | oldfile = __builtin__.file 22 | 23 | class newfile(oldfile): 24 | def __init__(self, *args): 25 | self.x = args[0] 26 | logger.debug("OPENING %s" % str(self.x)) 27 | oldfile.__init__(self, *args) 28 | openfiles.add(self) 29 | 30 | def close(self): 31 | logger.debug("CLOSING %s" % str(self.x)) 32 | oldfile.close(self) 33 | openfiles.remove(self) 34 | oldopen = __builtin__.open 35 | def newopen(*args): 36 | return newfile(*args) 37 | __builtin__.file = newfile 38 | __builtin__.open = newopen 39 | 40 | def printOpenFiles(): 41 | logger.debug("%d OPEN FILES: [%s]" % (len(openfiles), ", ".join(f.x for f in openfiles))) 42 | 43 | ########################################################################### 44 | 45 | def main(): 46 | """Parse arguments then generate database.""" 47 | logger.info('Started') 48 | parser = argparse.ArgumentParser( 49 | description='Generate a database at argument 1 based on files in ' 50 | 'argument 2.' 51 | ) 52 | parser.add_argument( 53 | 'source', 54 | type=str, 55 | help='Directory of audio files to be added to the database' 56 | ) 57 | parser.add_argument( 58 | 'target', 59 | type=str, 60 | nargs='?', 61 | default='', 62 | help='Directory to generate the database in. If the directory does not' 63 | ' exist then it will be created if possible' 64 | ) 65 | parser.add_argument( 66 | '--analyse', 67 | '-a', 68 | nargs='*', 69 | help='Specify analyses to be created. Valid analyses are: \'rms\'' 70 | '\'f0\' \'atk\' \'fft\'', 71 | default=[ 72 | "rms", 73 | "zerox", 74 | "fft", 75 | "spccntr", 76 | "spcsprd", 77 | "spcflux", 78 | "spccf", 79 | "spcflatness", 80 | "f0", 81 | "peak", 82 | "centroid", 83 | "variance", 84 | "kurtosis", 85 | "skewness" 86 | ] 87 | ) 88 | parser.add_argument( 89 | '--rms', 90 | nargs='+', 91 | help='Specify arguments for creating RMS analyses' 92 | ) 93 | parser.add_argument( 94 | '--atk', 95 | nargs='+', 96 | help='Specify arguments for creating attack analyses' 97 | ) 98 | parser.add_argument( 99 | '--zerox', 100 | nargs='+', 101 | help='Specify arguments for creating zero-crossing analyses' 102 | ) 103 | parser.add_argument( 104 | '--fft', 105 | nargs='+', 106 | help='Specify arguments for creating zero-crossing analyses' 107 | ) 108 | parser.add_argument( 109 | "--reanalyse", action="store_true", 110 | help="Force re-analysis of all analyses, overwriting any existing " 111 | "analyses" 112 | ) 113 | args = parser.parse_args() 114 | 115 | # Create database object 116 | database = AudioDatabase( 117 | args.source, 118 | args.target, 119 | analysis_list=args.analyse, 120 | config=config 121 | ) 122 | # Create/load a pre-existing database 123 | database.load_database(reanalyse=args.reanalyse) 124 | 125 | if __name__ == "__main__": 126 | main() 127 | -------------------------------------------------------------------------------- /src/sppysound/docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | This section provides installation instructions for installing the concatenator 5 | project on your system. There are alternative methods that will most likely 6 | work for installing both the project and many of it's dependencies, however 7 | the method shown below has been tested and is therefore the most reliable 8 | method for installing this project. 9 | 10 | Prerequesites 11 | ------------- 12 | 13 | Tested working on: 14 | 15 | - System: MacBook Air (11-inch, Mid 2011) 16 | 17 | - Operating System: OSX 10.11.4 or Ubuntu 14.04 18 | 19 | - Processor: 1.6 GHz Intel Core i5 20 | 21 | - Memory: 4 GB 1333 MHz DDR3 22 | 23 | There are a few dependencies required to install concatenator: 24 | 25 | 1. Python 2.7.11 (tested) 26 | 2. libsndfile - used for audio file IO 27 | 3. The HDF5 Library - used for large file storage 28 | 4. The Sox audio library - used for pitch shifting 29 | 30 | Getting this package 31 | ++++++++++++++++++++ 32 | 33 | This package can be downloaded with git using the following command: 34 | 35 | .. code:: bash 36 | 37 | git clone https://github.com/Pezz89/pysound 38 | 39 | This will clone the project folder into the current directory. 40 | 41 | Brew Python 42 | +++++++++++ 43 | There are a number of ways to install python. The simplest is through homebrew 44 | (OSX) / Linuxbrew (most Linux distributions) using the following command: 45 | 46 | .. code:: bash 47 | 48 | brew install python 49 | 50 | Homebrew can be installed by following installation instructions from here: 51 | 52 | http://brew.sh/ 53 | 54 | Linuxbrew can be installed by following installation instructions from here: 55 | 56 | https://github.com/Linuxbrew/linuxbrew 57 | 58 | Pyenv Python 59 | ++++++++++++ 60 | An alternative that allows greater flexibility is to use pyenv which allows for 61 | easy switching between python versions and guarantees the exact version needed: 62 | 63 | .. code:: bash 64 | 65 | brew install pyenv 66 | pyenv install 2.7.11 67 | pyenv global 2.7.11 68 | 69 | Note that the following may need to be added to your ~/.bashrc file to add 70 | pyenv pythons to your path. 71 | 72 | .. code:: bash 73 | 74 | if which pyenv > /dev/null; then eval "$(pyenv init -)"; fi 75 | export PYENV_ROOT="$HOME/.pyenv" 76 | export PATH="$PYENV_ROOT/bin:$PATH" 77 | eval "$(pyenv init -)" 78 | 79 | Other dependencies 80 | ++++++++++++++++++ 81 | 82 | libsndfile, sox and the HDF5 libraries can also be installed via homebrew/linuxbrew: 83 | 84 | .. code:: bash 85 | 86 | brew install libsndfile 87 | brew tap homebrew/science 88 | brew install hdf5 89 | brew install sox 90 | 91 | Python library and dependencies installation 92 | -------------------------------------------- 93 | 94 | The python package and it's dependencies can then be easily installed by 95 | running the ./install.sh script from the root director of the project. Note 96 | that this will install the project in it's project folder. To check that the 97 | project is working correctly, simply run run_tests. 98 | 99 | .. code:: bash 100 | 101 | ./install.sh 102 | ./run_tests 103 | 104 | There is a small chance that the installation may fail when installing 105 | dependencies such as scipy or numpy. In these cases the packages must be 106 | installed manually. When this has been done, simply re-run the install.sh 107 | script. 108 | 109 | Jupyter Notebook Examples 110 | --------------------------------------- 111 | 112 | The Jupyter notebook application is required in order to run the interactive 113 | examples. It is recommended that this is installed as part of the iPython 114 | library using: 115 | 116 | .. code:: bash 117 | 118 | pip install "ipython[all]" 119 | 120 | Notebooks can then be viewed from the Examples folder of the project by 121 | running: 122 | 123 | .. code:: bash 124 | 125 | jupyter notebook 126 | 127 | This will open a notebook session in the browser. 128 | -------------------------------------------------------------------------------- /src/sppysound/run_matching.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import audiofile 5 | import logging 6 | from fileops import loggerops 7 | import pdb 8 | import os 9 | import __builtin__ 10 | import config 11 | from database import AudioDatabase, Matcher 12 | pdb.pm 13 | 14 | import sys 15 | modpath = sys.argv[0] 16 | modpath = os.path.splitext(modpath)[0]+'.log' 17 | 18 | logger = loggerops.create_logger(log_filename=modpath) 19 | 20 | ''' 21 | ########################################################################### 22 | # File open and closing monitoring 23 | openfiles = set() 24 | oldfile = __builtin__.file 25 | 26 | class newfile(oldfile): 27 | def __init__(self, *args): 28 | self.x = args[0] 29 | logger.debug("OPENING %s" % str(self.x)) 30 | oldfile.__init__(self, *args) 31 | openfiles.add(self) 32 | 33 | def close(self): 34 | logger.debug("CLOSING %s" % str(self.x)) 35 | oldfile.close(self) 36 | openfiles.remove(self) 37 | oldopen = __builtin__.open 38 | def newopen(*args): 39 | return newfile(*args) 40 | __builtin__.file = newfile 41 | __builtin__.open = newopen 42 | 43 | def printOpenFiles(): 44 | logger.debug("%d OPEN FILES: [%s]" % (len(openfiles), ", ".join(f.x for f in openfiles))) 45 | 46 | ########################################################################### 47 | ''' 48 | 49 | def main(): 50 | """Parse arguments then generate database.""" 51 | logger.info('Started') 52 | parser = argparse.ArgumentParser( 53 | description='Generate a database at argument 1 based on files in ' 54 | 'argument 2.' 55 | ) 56 | parser.add_argument( 57 | 'source', 58 | type=str, 59 | help='Source database directory' 60 | ) 61 | parser.add_argument( 62 | 'target', 63 | type=str, 64 | help='Target database directory' 65 | ) 66 | parser.add_argument( 67 | 'output', 68 | type=str, 69 | help='output database directory' 70 | ) 71 | parser.add_argument( 72 | '--analyse', 73 | '-a', 74 | nargs='*', 75 | help='Specify analyses to be used. Valid analyses are: \'rms\'' 76 | '\'f0\' \'fft\'', 77 | default=[ 78 | "rms", 79 | "zerox", 80 | "fft", 81 | "spccntr", 82 | "spcsprd", 83 | "spcflux", 84 | "spccf", 85 | "spcflatness", 86 | "f0", 87 | "peak", 88 | "centroid", 89 | "kurtosis", 90 | "variance", 91 | "skewness", 92 | "harm_ratio" 93 | ] 94 | ) 95 | parser.add_argument( 96 | "--rematch", action="store_true", 97 | help="Force re-matching, overwriting any existing match data " 98 | ) 99 | args = parser.parse_args() 100 | source_db = AudioDatabase( 101 | args.source, 102 | analysis_list=args.analyse, 103 | config=config 104 | ) 105 | # Create/load a pre-existing database 106 | source_db.load_database(reanalyse=False) 107 | 108 | target_db = AudioDatabase( 109 | args.target, 110 | analysis_list=args.analyse, 111 | config=config 112 | ) 113 | 114 | # Create/load a pre-existing database 115 | target_db.load_database(reanalyse=False) 116 | 117 | output_db = AudioDatabase( 118 | args.output, 119 | config=config 120 | ) 121 | 122 | # Create/load a pre-existing database 123 | output_db.load_database(reanalyse=False) 124 | 125 | 126 | matcher = Matcher( 127 | source_db, 128 | target_db, 129 | config.analysis_dict, 130 | output_db=output_db, 131 | config=config, 132 | quantity=config.matcher["match_quantity"], 133 | rematch=args.rematch 134 | ) 135 | matcher.match(matcher.brute_force_matcher, grain_size=config.matcher["grain_size"], overlap=config.matcher["overlap"]) 136 | 137 | #matcher.match(matcher.k_nearest_neighbour_matching, grain_size=config.matcher["grain_size"], overlap=config.matcher["overlap"]) 138 | 139 | if __name__ == "__main__": 140 | main() 141 | -------------------------------------------------------------------------------- /src/sppysound/analysis/PeakAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import numpy as np 4 | import logging 5 | from scipy import signal 6 | from numpy.lib import stride_tricks 7 | import pdb 8 | 9 | 10 | from fileops import pathops 11 | 12 | from Analysis import Analysis 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class PeakAnalysis(Analysis): 18 | 19 | """ 20 | Peak descriptor class for generation of per-grain maximum peak audio analysis. 21 | 22 | This descriptor calculates the maximum peak for overlapping grains of an 23 | AnalysedAudioFile object. A full definition of peak analysis can be found in 24 | the documentation. 25 | 26 | Arguments: 27 | 28 | - analysis_group: the HDF5 file group to use for the storage of the 29 | analysis. 30 | 31 | - config: The configuration module used to configure the analysis 32 | """ 33 | 34 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 35 | super(PeakAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'Peak') 36 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 37 | # Store reference to the file to be analysed 38 | self.AnalysedAudioFile = AnalysedAudioFile 39 | 40 | self.analysis_group = analysis_group 41 | self.logger.info("Creating Peak analysis for {0}".format(self.AnalysedAudioFile.name)) 42 | self.create_analysis(frames) 43 | 44 | @staticmethod 45 | def create_peak_analysis(frames, window_size=512, 46 | window=signal.triang, 47 | overlapFac=0.5): 48 | """ 49 | Calculate the Peak values of windowed segments of the audio file and 50 | save to disk. 51 | """ 52 | if hasattr(frames, '__call__'): 53 | frames = frames() 54 | # Calculate the period of the window in hz 55 | # lowest_freq = 1.0 / window_size 56 | # Filter frequencies lower than the period of the window 57 | # filter = ButterFilter() 58 | # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate) 59 | # TODO: Fix filter 60 | # frames = filter.filter_butter(frames) 61 | 62 | # Generate a window function to apply to peak windows before analysis 63 | hopSize = int(window_size - np.floor(overlapFac * window_size)) 64 | 65 | # zeros at beginning (thus center of 1st window should be for sample nr. 0) 66 | samples = np.append(np.zeros(np.floor(window_size/2.0)), frames) 67 | 68 | # cols for windowing 69 | cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1 70 | # zeros at end (thus samples can be fully covered by frames) 71 | samples = np.append(samples, np.zeros(window_size)) 72 | 73 | frames = stride_tricks.as_strided( 74 | samples, 75 | shape=(cols, window_size), 76 | strides=(samples.strides[0]*hopSize, samples.strides[0]) 77 | ).copy() 78 | 79 | peak = np.max(np.abs(frames), axis=1) 80 | 81 | return peak 82 | 83 | def hdf5_dataset_formatter(self, *args, **kwargs): 84 | ''' 85 | Formats the output from the analysis method to save to the HDF5 file. 86 | ''' 87 | samplerate = self.AnalysedAudioFile.samplerate 88 | peak = self.create_peak_analysis(*args, **kwargs) 89 | peak_times = self.calc_peak_frame_times(peak, args[0], samplerate) 90 | return ({'frames': peak, 'times': peak_times}, {}) 91 | 92 | @staticmethod 93 | def calc_peak_frame_times(peakframes, sample_frames, samplerate): 94 | 95 | """Calculate times for frames using sample size and samplerate.""" 96 | 97 | if hasattr(sample_frames, '__call__'): 98 | sample_frames = sample_frames() 99 | # Get number of frames for time and frequency 100 | timebins = peakframes.shape[0] 101 | # Create array ranging from 0 to number of time frames 102 | scale = np.arange(timebins+1) 103 | # divide the number of samples by the total number of frames, then 104 | # multiply by the frame numbers. 105 | peak_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float) 106 | # Divide by the samplerate to give times in seconds 107 | peak_times = peak_times / samplerate 108 | return peak_times 109 | -------------------------------------------------------------------------------- /src/sppysound/analysis/SpectralCentroidAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | import logging 4 | import pdb 5 | 6 | from Analysis import Analysis 7 | 8 | class SpectralCentroidAnalysis(Analysis): 9 | """ 10 | Spectral centroid descriptor class for generation of spectral centroid 11 | audio analysis. 12 | 13 | This descriptor calculates the spectral centroid for overlapping grains of 14 | an AnalysedAudioFile object. A full definition of spectral centroid 15 | analysis can be found in the documentation. 16 | 17 | Arguments: 18 | 19 | - analysis_group: the HDF5 file group to use for the storage of the 20 | analysis. 21 | 22 | - config: The configuration module used to configure the analysis 23 | """ 24 | 25 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 26 | super(SpectralCentroidAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'SpcCntr') 27 | # Create logger for module 28 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 29 | # Store reference to the file to be analysed 30 | self.AnalysedAudioFile = AnalysedAudioFile 31 | self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2. 32 | try: 33 | fft = self.AnalysedAudioFile.analyses["fft"] 34 | except KeyError: 35 | raise KeyError("FFT analysis is required for spectral spread " 36 | "analysis.") 37 | 38 | self.analysis_group = analysis_group 39 | self.logger.info("Creating Spectral Centroid analysis for {0}".format(self.AnalysedAudioFile.name)) 40 | self.create_analysis( 41 | self.create_spccntr_analysis, 42 | fft.analysis['frames'], 43 | self.AnalysedAudioFile.samplerate 44 | ) 45 | self.spccntr_window_count = None 46 | 47 | def hdf5_dataset_formatter(self, analysis_method, *args, **kwargs): 48 | ''' 49 | Formats the output from the analysis method to save to the HDF5 file. 50 | ''' 51 | samplerate = self.AnalysedAudioFile.samplerate 52 | output = self.create_spccntr_analysis(*args, **kwargs) 53 | times = self.calc_spccntr_frame_times(output, self.AnalysedAudioFile.frames, samplerate) 54 | return ({'frames': output, 'times': times}, {}) 55 | 56 | @staticmethod 57 | def create_spccntr_analysis(fft, samplerate, output_format="ind"): 58 | ''' 59 | Calculate the spectral centroid of the fft frames. 60 | 61 | samplerate: the samplerate of the audio analysed. 62 | output_format = Choose either "freq" for output in Hz or "ind" for bin 63 | index output 64 | ''' 65 | fft = fft[:] 66 | # Get the positive magnitudes of each bin. 67 | magnitudes = np.abs(fft) 68 | # Get the highest magnitude. 69 | mag_max = np.max(magnitudes) 70 | if not mag_max: 71 | y = np.empty(magnitudes.shape[0]) 72 | y.fill(np.nan) 73 | return y 74 | # Calculate the centre frequency of each rfft bin. 75 | if output_format == "freq": 76 | freqs = np.fft.rfftfreq((np.size(fft, axis=1)*2)-1, 1.0/samplerate) 77 | elif output_format == "ind": 78 | freqs = np.arange(np.size(fft, axis=1)) 79 | else: 80 | raise ValueError("\'{0}\' is not a valid output " 81 | "format.".format(output_format)) 82 | # Calculate the weighted mean 83 | y = np.sum(magnitudes*freqs, axis=1) / (np.sum(magnitudes, axis=1)) 84 | 85 | return y 86 | 87 | @staticmethod 88 | def calc_spccntr_frame_times(spccntr_frames, sample_frame_count, samplerate): 89 | 90 | """Calculate times for frames using sample size and samplerate.""" 91 | 92 | # Get number of frames for time and frequency 93 | timebins = spccntr_frames.shape[0] 94 | # Create array ranging from 0 to number of time frames 95 | scale = np.arange(timebins+1) 96 | # divide the number of samples by the total number of frames, then 97 | # multiply by the frame numbers. 98 | spccntr_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float) 99 | # Divide by the samplerate to give times in seconds 100 | spccntr_times = spccntr_times / samplerate 101 | return spccntr_times 102 | 103 | -------------------------------------------------------------------------------- /src/sppysound/Examples/Database analysis example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Creating a database of analysed audio files\n", 8 | "Database objects are used to group audio files and their analyses into a single object in order to perform further operations (such as matching).\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": { 15 | "collapsed": false 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "from sppysound.database import AudioDatabase\n", 20 | "import analysis_config" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "Specify the directory to search recursively for audio files in." 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "file_dir = \"./ExampleFiles\"" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "Specify the directory to generate the database in." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "database_dir = \"./ExampleDatabase\"" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "Create a list of analysis trings that determine the descriptors to be generated by the object" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 5, 69 | "metadata": { 70 | "collapsed": true 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "analysis_list = [\"rms\", \"f0\"]" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 7, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "database = AudioDatabase(file_dir, database_dir, analysis_list=analysis_list, config=analysis_config)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "The load_database method will search for any pre-existing analyses and load these, aswell as generating new analyses that aren't already present. These will be organized and stored in the database directory in \"data\" and \"audio\" sub-directories." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 8, 98 | "metadata": { 99 | "collapsed": false 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "database.load_database(reanalyse=False)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "Once analysed, the database object can be used with objects such as the matcher object it's entries to other databases." 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "A \"config.py\" file is required to be used as a configuration module for the database. This will contain configurations for initialising audio files contained in the database. The example config.py looks like this:\n", 118 | "\n", 119 | "~~~python\n", 120 | "rms = {\n", 121 | " \"window_size\": 100,\n", 122 | " \"overlap\": 2,\n", 123 | "}\n", 124 | "\n", 125 | "analysis_dict = {\n", 126 | " \"f0\": \"log2_median\",\n", 127 | " \"rms\": \"mean\"\n", 128 | "}\n", 129 | "\n", 130 | "analysis = {\n", 131 | " \"reanalyse\": False\n", 132 | "}\n", 133 | "\n", 134 | "output_file = {\n", 135 | " \"samplerate\": 44100,\n", 136 | " \"format\": 131075,\n", 137 | " \"channels\": 1\n", 138 | "}\n", 139 | "~~~" 140 | ] 141 | } 142 | ], 143 | "metadata": { 144 | "kernelspec": { 145 | "display_name": "Python 2", 146 | "language": "python", 147 | "name": "python2" 148 | }, 149 | "language_info": { 150 | "codemirror_mode": { 151 | "name": "ipython", 152 | "version": 2 153 | }, 154 | "file_extension": ".py", 155 | "mimetype": "text/x-python", 156 | "name": "python", 157 | "nbconvert_exporter": "python", 158 | "pygments_lexer": "ipython2", 159 | "version": "2.7.10" 160 | } 161 | }, 162 | "nbformat": 4, 163 | "nbformat_minor": 0 164 | } 165 | -------------------------------------------------------------------------------- /src/sppysound/docs/DatabaseAnalysisExample.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Creating a database of analysed audio files\n", 8 | "Database objects are used to group audio files and their analyses into a single object in order to perform further operations (such as matching).\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": { 15 | "collapsed": false 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "from sppysound.database import AudioDatabase\n", 20 | "import analysis_config" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "Specify the directory to search recursively for audio files in." 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "file_dir = \"./ExampleFiles\"" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "Specify the directory to generate the database in." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "database_dir = \"./ExampleDatabase\"" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "Create a list of analysis trings that determine the descriptors to be generated by the object" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 4, 69 | "metadata": { 70 | "collapsed": true 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "analysis_list = [\"rms\", \"f0\"]" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "database = AudioDatabase(\n", 86 | " file_dir, \n", 87 | " database_dir, \n", 88 | " analysis_list=analysis_list, \n", 89 | " config=analysis_config\n", 90 | ")" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "The load_database method will search for any pre-existing analyses and load these, aswell as generating new analyses that aren't already present. These will be organized and stored in the database directory in \"data\" and \"audio\" sub-directories." 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 6, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "database.load_database(reanalyse=True)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "Once analysed, the database object can be used with objects such as the matcher object it's entries to other databases." 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "A \"config.py\" file is required to be used as a configuration module for the database. This will contain configurations for initialising audio files contained in the database. The example config.py looks like this:\n", 123 | "\n", 124 | "~~~python\n", 125 | "rms = {\n", 126 | " \"window_size\": 100,\n", 127 | " \"overlap\": 2,\n", 128 | "}\n", 129 | "\n", 130 | "analysis_dict = {\n", 131 | " \"f0\": \"log2_median\",\n", 132 | " \"rms\": \"mean\"\n", 133 | "}\n", 134 | "\n", 135 | "analysis = {\n", 136 | " \"reanalyse\": False\n", 137 | "}\n", 138 | "\n", 139 | "output_file = {\n", 140 | " \"samplerate\": 44100,\n", 141 | " \"format\": 131075,\n", 142 | " \"channels\": 1\n", 143 | "}\n", 144 | "~~~" 145 | ] 146 | } 147 | ], 148 | "metadata": { 149 | "kernelspec": { 150 | "display_name": "Python 2", 151 | "language": "python", 152 | "name": "python2" 153 | }, 154 | "language_info": { 155 | "codemirror_mode": { 156 | "name": "ipython", 157 | "version": 2 158 | }, 159 | "file_extension": ".py", 160 | "mimetype": "text/x-python", 161 | "name": "python", 162 | "nbconvert_exporter": "python", 163 | "pygments_lexer": "ipython2", 164 | "version": "2.7.11" 165 | } 166 | }, 167 | "nbformat": 4, 168 | "nbformat_minor": 0 169 | } 170 | -------------------------------------------------------------------------------- /src/sppysound/analysis/F0HarmRatioAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | import logging 4 | import pdb 5 | import numpy as np 6 | from numpy.lib import stride_tricks 7 | from Analysis import Analysis 8 | from scipy import signal 9 | from numpy.fft import fft, ifft, fftshift 10 | import warnings 11 | 12 | from numpy import polyfit, arange 13 | 14 | class F0HarmRatioAnalysis(Analysis): 15 | 16 | """ 17 | The F0 HarmRatio analysis object is a placeholder class to allow access to 18 | the harmonic ratio generated in the f0 analysis. As a result it does not 19 | have it's own "create analysis method as other analyses do. it is designed 20 | to be used for the retreival of the f0 harmonic ratio analysis for 21 | matching. 22 | 23 | F0 analysis must be generated for the AnalysedAudioFile in order to use 24 | this object. 25 | """ 26 | 27 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 28 | super(F0HarmRatioAnalysis, self).__init__(AnalysedAudioFile, frames, analysis_group, 'F0HarmRatio') 29 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 30 | # Store reference to the file to be analysed 31 | self.AnalysedAudioFile = AnalysedAudioFile 32 | 33 | if config: 34 | self.threshold = config.f0["ratio_threshold"] 35 | else: 36 | self.threshold = 0. 37 | 38 | self.analysis_group = analysis_group 39 | self.logger.info("Initialising F0HarmRatio analysis for {0}".format(self.AnalysedAudioFile.name)) 40 | 41 | def get_analysis_grains(self, start, end): 42 | """ 43 | Retrieve analysis frames for period specified in start and end times. 44 | arrays of start and end time pairs will produce an array of equivelant 45 | size containing frames for these times. 46 | """ 47 | times = self.analysis_group["F0"]["times"][:] 48 | hr = self.analysis_group["F0"]["harmonic_ratio"][:] 49 | start = start / 1000 50 | end = end / 1000 51 | vtimes = times.reshape(-1, 1) 52 | 53 | nan_inds = hr < self.threshold 54 | hr[nan_inds] = np.nan 55 | 56 | selection = np.transpose((vtimes >= start) & (vtimes <= end)) 57 | if not selection.any(): 58 | frame_center = start + (end-start)/2. 59 | closest_frames = np.abs(vtimes-frame_center).argsort()[:2] 60 | selection[closest_frames] = True 61 | 62 | return ((hr, times), selection) 63 | 64 | @staticmethod 65 | def calc_F0HarmRatio_frame_times(F0HarmRatioframes, sample_frames, samplerate): 66 | 67 | """Calculate times for frames using sample size and samplerate.""" 68 | samplerate *= 1 69 | 70 | if hasattr(sample_frames, '__call__'): 71 | sample_frames = sample_frames() 72 | # Get number of frames for time and frequency 73 | timebins = F0HarmRatioframes.shape[0] 74 | # Create array ranging from 0 to number of time frames 75 | scale = np.arange(timebins+1) 76 | # divide the number of samples by the total number of frames, then 77 | # multiply by the frame numbers. 78 | F0HarmRatio_times = (sample_frames.shape[0]/timebins) * scale[:-1] 79 | # Divide by the samplerate to give times in seconds 80 | F0HarmRatio_times = F0HarmRatio_times / samplerate 81 | return F0HarmRatio_times 82 | 83 | def analysis_formatter(self, data, selection, format): 84 | """Calculate the average analysis value of the grain using the match format specified.""" 85 | harm_ratio, times = data 86 | # Get indexes of all valid frames (that aren't nan) 87 | valid_inds = np.isfinite(harm_ratio) 88 | 89 | format_style_dict = { 90 | 'mean': np.mean, 91 | 'median': np.median, 92 | 'log2_mean': self.log2_mean, 93 | 'log2_median': self.log2_median, 94 | } 95 | 96 | # For debugging apply along axis: 97 | #for ind, i in enumerate(selection): 98 | # output[ind] = self.formatter_func(i, frames, valid_inds, harm_ratio, formatter=format_style_dict[format]) 99 | 100 | if not selection.size: 101 | # TODO: Add warning here 102 | return np.nan 103 | 104 | output = np.apply_along_axis( 105 | self.formatter_func, 106 | 1, 107 | selection, 108 | harm_ratio, 109 | valid_inds, 110 | formatter=format_style_dict[format] 111 | ) 112 | 113 | return output 114 | 115 | -------------------------------------------------------------------------------- /src/sppysound/config.py: -------------------------------------------------------------------------------- 1 | # Specify analysis parameters for root mean square analysis. 2 | rms = { 3 | # Analysis window sizes can be changed for each analysis individually. 4 | # These do not need to match the grain size of the matcher or synthesis. 5 | "window_size": 100, 6 | "overlap": 4, 7 | } 8 | 9 | f0 = { 10 | "window_size": 4096, 11 | "overlap": 4, 12 | # Currently all frames below this ratio are digaurded and left as silence. 13 | # Different databases will require different values for the best results. 14 | # Noisier databases will need lower values than more tonal databases. 15 | "ratio_threshold": 0.81 16 | } 17 | 18 | # Specify analysis parameters for variance analysis. 19 | variance = { 20 | "window_size": 100, 21 | "overlap": 4 22 | } 23 | 24 | # Specify analysis parameters for temporal kurtosis analysis. 25 | kurtosis = { 26 | "window_size": 100, 27 | "overlap": 4 28 | } 29 | 30 | # Specify analysis parameters for temporal skewness analysis. 31 | skewness = { 32 | "window_size": 100, 33 | "overlap": 4 34 | } 35 | 36 | # Specify analysis parameters for FFT analysis. 37 | fft = { 38 | # The FFT window size determines the window size for all spectral analyses. 39 | "window_size": 4096 40 | } 41 | 42 | database = { 43 | # Enables creation of symbolic links to files not in the database rather 44 | # than making pysical copies. 45 | "symlink": True 46 | } 47 | 48 | # Sets the weighting for each analysis. a higher weighting gives an analysis 49 | # higher presendence when finding the best matches. 50 | matcher_weightings = { 51 | "f0" : 8, 52 | "spccntr" : 1., 53 | "spcsprd" : 1., 54 | "spcflux" : 3., 55 | "spccf" : 3., 56 | "spcflatness": 3., 57 | "zerox" : 1., 58 | "rms" : 0.1, 59 | "peak": 0.1, 60 | "centroid": 0.5, 61 | "kurtosis": 2., 62 | "skewness": 2., 63 | "variance": 0., 64 | "harm_ratio": 2 65 | } 66 | 67 | # Specifies the method for averaging analysis frames to create a single value 68 | # for comparing to other grains. Possible formatters are: 'mean', 'median', 69 | # 'log2_mean', 'log2_median' 70 | analysis_dict = { 71 | # log2_median formats using mel scale. This is useful for analyses such as 72 | # F0. 73 | "f0": "median", 74 | "rms": "mean", 75 | "zerox": "mean", 76 | "spccntr": "median", 77 | "spcsprd": "median", 78 | "spcflux": "median", 79 | "spccf": "median", 80 | "spcflatness": "median", 81 | "peak": "mean", 82 | "centroid": "mean", 83 | "kurtosis": "mean", 84 | "skewness": "mean", 85 | "variance": "mean", 86 | "harm_ratio": "mean" 87 | } 88 | 89 | analysis = { 90 | # Force the deletion of any pre-existing analyses to create new ones. This 91 | # is needed for overwriting old analyses generated with different 92 | # parameters to the current ones. 93 | "reanalyse": False 94 | } 95 | 96 | matcher = { 97 | # Force the re-matching of analyses 98 | "rematch": False, 99 | # This value must be the same as the synthesis grain size to avoid the 100 | # speeding up or slowing down of the resulting file in relation to the 101 | # original. 102 | "grain_size": 100, 103 | "overlap": 4, 104 | # Defines the number of matches to keep for synthesis. Note that this must 105 | # also be specified in the synthesis config 106 | "match_quantity": 2, 107 | # Choose the algorithm used to perform matching. kdtree is recommended for 108 | # larger datasets. 109 | "method": 'kdtree' 110 | } 111 | 112 | synthesizer = { 113 | # Artificially scale the output grain by the difference in RMS values 114 | # between source and target. 115 | "enforce_intensity": True, 116 | # Specify the ratio limit that is the grain can be scaled by. 117 | "enf_intensity_ratio_limit": 1000., 118 | # Artificially modify the pitch by the difference in f0 values between 119 | # source and target. 120 | "enforce_f0": True, 121 | # Specify the ratio limit that is the grain can be modified by. 122 | "enf_f0_ratio_limit": 1., 123 | "grain_size": 100, 124 | "overlap": 4, 125 | # Normalize output, avoid clipping of final output by scaling the final 126 | # frames. 127 | "normalize" : False, 128 | # Defines the number of potential grains to choose from matches when 129 | # synthesizing output. 130 | "match_quantity": 2 131 | } 132 | 133 | # Specifies the format for the output file. Changing this has not been tested 134 | # so may produce errors/undesirable results. 135 | output_file = { 136 | "samplerate": 44100, 137 | "format": 131075, 138 | "channels": 1 139 | } 140 | -------------------------------------------------------------------------------- /src/sppysound/docs/analysis_config.py: -------------------------------------------------------------------------------- 1 | # Specify analysis parameters for root mean square analysis. 2 | rms = { 3 | # Analysis window sizes can be changed for each analysis individually. 4 | # These do not need to match the grain size of the matcher or synthesis. 5 | "window_size": 100, 6 | "overlap": 8, 7 | } 8 | 9 | f0 = { 10 | "window_size": 4096, 11 | "overlap": 8, 12 | # Currently all frames below this ratio are digaurded and left as silence. 13 | # Different databases will require different values for the best results. 14 | # Noisier databases will need lower values than more tonal databases. 15 | "ratio_threshold": 0.45 16 | } 17 | 18 | # Specify analysis parameters for variance analysis. 19 | variance = { 20 | "window_size": 100, 21 | "overlap": 8 22 | } 23 | 24 | # Specify analysis parameters for temporal kurtosis analysis. 25 | kurtosis = { 26 | "window_size": 100, 27 | "overlap": 8 28 | } 29 | 30 | # Specify analysis parameters for temporal skewness analysis. 31 | skewness = { 32 | "window_size": 100, 33 | "overlap": 8 34 | } 35 | 36 | # Specify analysis parameters for FFT analysis. 37 | fft = { 38 | # The FFT window size determines the window size for all spectral analyses. 39 | "window_size": 4096 40 | } 41 | 42 | database = { 43 | # Enables creation of symbolic links to files not in the database rather 44 | # than making pysical copies. 45 | "symlink": True 46 | } 47 | 48 | # Sets the weighting for each analysis. a higher weighting gives an analysis 49 | # higher presendence when finding the best matches. 50 | matcher_weightings = { 51 | "f0" : 0.5, 52 | "spccntr" : 1., 53 | "spcsprd" : 1., 54 | "spcflux" : 3., 55 | "spccf" : 3., 56 | "spcflatness": 3., 57 | "zerox" : 1., 58 | "rms" : 0.1, 59 | "peak": 0.1, 60 | "centroid": 0.5, 61 | "kurtosis": 2., 62 | "skewness": 2., 63 | "variance": 0., 64 | "harm_ratio": 2 65 | } 66 | 67 | # Specifies the method for averaging analysis frames to create a single value 68 | # for comparing to other grains. Possible formatters are: 'mean', 'median', 69 | # 'log2_mean', 'log2_median' 70 | analysis_dict = { 71 | # log2_median formats using mel scale. This is useful for analyses such as 72 | # F0. 73 | "f0": "log2_median", 74 | "rms": "mean", 75 | "zerox": "mean", 76 | "spccntr": "median", 77 | "spcsprd": "median", 78 | "spcflux": "median", 79 | "spccf": "median", 80 | "spcflatness": "median", 81 | "peak": "mean", 82 | "centroid": "mean", 83 | "kurtosis": "mean", 84 | "skewness": "mean", 85 | "variance": "mean", 86 | "harm_ratio": "mean" 87 | } 88 | 89 | analysis = { 90 | # Force the deletion of any pre-existing analyses to create new ones. This 91 | # is needed for overwriting old analyses generated with different 92 | # parameters to the current ones. 93 | "reanalyse": False 94 | } 95 | 96 | matcher = { 97 | # Force the re-matching of analyses 98 | "rematch": False, 99 | # This value must be the same as the synthesis grain size to avoid the 100 | # speeding up or slowing down of the resulting file in relation to the 101 | # original. 102 | "grain_size": 100, 103 | "overlap": 8, 104 | # Defines the number of matches to keep for synthesis. Note that this must 105 | # also be specified in the synthesis config 106 | "match_quantity": 5, 107 | # Choose the algorithm used to perform matching. kdtree is recommended for 108 | # larger datasets. 109 | "method": 'kdtree' 110 | } 111 | 112 | synthesizer = { 113 | # Artificially scale the output grain by the difference in RMS values 114 | # between source and target. 115 | "enforce_intensity": True, 116 | # Specify the ratio limit that is the grain can be scaled by. 117 | "enf_intensity_ratio_limit": 1000., 118 | # Artificially modify the pitch by the difference in f0 values between 119 | # source and target. 120 | "enforce_f0": True, 121 | # Specify the ratio limit that is the grain can be modified by. 122 | "enf_f0_ratio_limit": 10., 123 | "grain_size": 100, 124 | "overlap": 8, 125 | # Normalize output, avoid clipping of final output by scaling the final 126 | # frames. 127 | "normalize" : True, 128 | # Defines the number of potential grains to choose from matches when 129 | # synthesizing output. 130 | "match_quantity": 5 131 | } 132 | 133 | # Specifies the format for the output file. Changing this has not been tested 134 | # so may produce errors/undesirable results. 135 | output_file = { 136 | "samplerate": 44100, 137 | "format": 131075, 138 | "channels": 1 139 | } 140 | -------------------------------------------------------------------------------- /src/sppysound/docs/matching_config.py: -------------------------------------------------------------------------------- 1 | # Specify analysis parameters for root mean square analysis. 2 | rms = { 3 | # Analysis window sizes can be changed for each analysis individually. 4 | # These do not need to match the grain size of the matcher or synthesis. 5 | "window_size": 100, 6 | "overlap": 8, 7 | } 8 | 9 | f0 = { 10 | "window_size": 4096, 11 | "overlap": 8, 12 | # Currently all frames below this ratio are digaurded and left as silence. 13 | # Different databases will require different values for the best results. 14 | # Noisier databases will need lower values than more tonal databases. 15 | "ratio_threshold": 0.45 16 | } 17 | 18 | # Specify analysis parameters for variance analysis. 19 | variance = { 20 | "window_size": 100, 21 | "overlap": 8 22 | } 23 | 24 | # Specify analysis parameters for temporal kurtosis analysis. 25 | kurtosis = { 26 | "window_size": 100, 27 | "overlap": 8 28 | } 29 | 30 | # Specify analysis parameters for temporal skewness analysis. 31 | skewness = { 32 | "window_size": 100, 33 | "overlap": 8 34 | } 35 | 36 | # Specify analysis parameters for FFT analysis. 37 | fft = { 38 | # The FFT window size determines the window size for all spectral analyses. 39 | "window_size": 4096 40 | } 41 | 42 | database = { 43 | # Enables creation of symbolic links to files not in the database rather 44 | # than making pysical copies. 45 | "symlink": True 46 | } 47 | 48 | # Sets the weighting for each analysis. a higher weighting gives an analysis 49 | # higher presendence when finding the best matches. 50 | matcher_weightings = { 51 | "f0" : 0.5, 52 | "spccntr" : 1., 53 | "spcsprd" : 1., 54 | "spcflux" : 3., 55 | "spccf" : 3., 56 | "spcflatness": 3., 57 | "zerox" : 1., 58 | "rms" : 0.1, 59 | "peak": 0.1, 60 | "centroid": 0.5, 61 | "kurtosis": 2., 62 | "skewness": 2., 63 | "variance": 0., 64 | "harm_ratio": 2 65 | } 66 | 67 | # Specifies the method for averaging analysis frames to create a single value 68 | # for comparing to other grains. Possible formatters are: 'mean', 'median', 69 | # 'log2_mean', 'log2_median' 70 | analysis_dict = { 71 | # log2_median formats using mel scale. This is useful for analyses such as 72 | # F0. 73 | "f0": "log2_median", 74 | "rms": "mean", 75 | "zerox": "mean", 76 | "spccntr": "median", 77 | "spcsprd": "median", 78 | "spcflux": "median", 79 | "spccf": "median", 80 | "spcflatness": "median", 81 | "peak": "mean", 82 | "centroid": "mean", 83 | "kurtosis": "mean", 84 | "skewness": "mean", 85 | "variance": "mean", 86 | "harm_ratio": "mean" 87 | } 88 | 89 | analysis = { 90 | # Force the deletion of any pre-existing analyses to create new ones. This 91 | # is needed for overwriting old analyses generated with different 92 | # parameters to the current ones. 93 | "reanalyse": False 94 | } 95 | 96 | matcher = { 97 | # Force the re-matching of analyses 98 | "rematch": False, 99 | # This value must be the same as the synthesis grain size to avoid the 100 | # speeding up or slowing down of the resulting file in relation to the 101 | # original. 102 | "grain_size": 100, 103 | "overlap": 8, 104 | # Defines the number of matches to keep for synthesis. Note that this must 105 | # also be specified in the synthesis config 106 | "match_quantity": 5, 107 | # Choose the algorithm used to perform matching. kdtree is recommended for 108 | # larger datasets. 109 | "method": 'kdtree' 110 | } 111 | 112 | synthesizer = { 113 | # Artificially scale the output grain by the difference in RMS values 114 | # between source and target. 115 | "enforce_intensity": True, 116 | # Specify the ratio limit that is the grain can be scaled by. 117 | "enf_intensity_ratio_limit": 1000., 118 | # Artificially modify the pitch by the difference in f0 values between 119 | # source and target. 120 | "enforce_f0": True, 121 | # Specify the ratio limit that is the grain can be modified by. 122 | "enf_f0_ratio_limit": 10., 123 | "grain_size": 100, 124 | "overlap": 8, 125 | # Normalize output, avoid clipping of final output by scaling the final 126 | # frames. 127 | "normalize" : True, 128 | # Defines the number of potential grains to choose from matches when 129 | # synthesizing output. 130 | "match_quantity": 5 131 | } 132 | 133 | # Specifies the format for the output file. Changing this has not been tested 134 | # so may produce errors/undesirable results. 135 | output_file = { 136 | "samplerate": 44100, 137 | "format": 131075, 138 | "channels": 1 139 | } 140 | -------------------------------------------------------------------------------- /src/sppysound/docs/synthesis_config.py: -------------------------------------------------------------------------------- 1 | # Specify analysis parameters for root mean square analysis. 2 | rms = { 3 | # Analysis window sizes can be changed for each analysis individually. 4 | # These do not need to match the grain size of the matcher or synthesis. 5 | "window_size": 100, 6 | "overlap": 8, 7 | } 8 | 9 | f0 = { 10 | "window_size": 4096, 11 | "overlap": 8, 12 | # Currently all frames below this ratio are digaurded and left as silence. 13 | # Different databases will require different values for the best results. 14 | # Noisier databases will need lower values than more tonal databases. 15 | "ratio_threshold": 0.45 16 | } 17 | 18 | # Specify analysis parameters for variance analysis. 19 | variance = { 20 | "window_size": 100, 21 | "overlap": 8 22 | } 23 | 24 | # Specify analysis parameters for temporal kurtosis analysis. 25 | kurtosis = { 26 | "window_size": 100, 27 | "overlap": 8 28 | } 29 | 30 | # Specify analysis parameters for temporal skewness analysis. 31 | skewness = { 32 | "window_size": 100, 33 | "overlap": 8 34 | } 35 | 36 | # Specify analysis parameters for FFT analysis. 37 | fft = { 38 | # The FFT window size determines the window size for all spectral analyses. 39 | "window_size": 4096 40 | } 41 | 42 | database = { 43 | # Enables creation of symbolic links to files not in the database rather 44 | # than making pysical copies. 45 | "symlink": True 46 | } 47 | 48 | # Sets the weighting for each analysis. a higher weighting gives an analysis 49 | # higher presendence when finding the best matches. 50 | matcher_weightings = { 51 | "f0" : 0.5, 52 | "spccntr" : 1., 53 | "spcsprd" : 1., 54 | "spcflux" : 3., 55 | "spccf" : 3., 56 | "spcflatness": 3., 57 | "zerox" : 1., 58 | "rms" : 0.1, 59 | "peak": 0.1, 60 | "centroid": 0.5, 61 | "kurtosis": 2., 62 | "skewness": 2., 63 | "variance": 0., 64 | "harm_ratio": 2 65 | } 66 | 67 | # Specifies the method for averaging analysis frames to create a single value 68 | # for comparing to other grains. Possible formatters are: 'mean', 'median', 69 | # 'log2_mean', 'log2_median' 70 | analysis_dict = { 71 | # log2_median formats using mel scale. This is useful for analyses such as 72 | # F0. 73 | "f0": "log2_median", 74 | "rms": "mean", 75 | "zerox": "mean", 76 | "spccntr": "median", 77 | "spcsprd": "median", 78 | "spcflux": "median", 79 | "spccf": "median", 80 | "spcflatness": "median", 81 | "peak": "mean", 82 | "centroid": "mean", 83 | "kurtosis": "mean", 84 | "skewness": "mean", 85 | "variance": "mean", 86 | "harm_ratio": "mean" 87 | } 88 | 89 | analysis = { 90 | # Force the deletion of any pre-existing analyses to create new ones. This 91 | # is needed for overwriting old analyses generated with different 92 | # parameters to the current ones. 93 | "reanalyse": False 94 | } 95 | 96 | matcher = { 97 | # Force the re-matching of analyses 98 | "rematch": False, 99 | # This value must be the same as the synthesis grain size to avoid the 100 | # speeding up or slowing down of the resulting file in relation to the 101 | # original. 102 | "grain_size": 100, 103 | "overlap": 8, 104 | # Defines the number of matches to keep for synthesis. Note that this must 105 | # also be specified in the synthesis config 106 | "match_quantity": 5, 107 | # Choose the algorithm used to perform matching. kdtree is recommended for 108 | # larger datasets. 109 | "method": 'kdtree' 110 | } 111 | 112 | synthesizer = { 113 | # Artificially scale the output grain by the difference in RMS values 114 | # between source and target. 115 | "enforce_intensity": True, 116 | # Specify the ratio limit that is the grain can be scaled by. 117 | "enf_intensity_ratio_limit": 1000., 118 | # Artificially modify the pitch by the difference in f0 values between 119 | # source and target. 120 | "enforce_f0": True, 121 | # Specify the ratio limit that is the grain can be modified by. 122 | "enf_f0_ratio_limit": 10., 123 | "grain_size": 100, 124 | "overlap": 8, 125 | # Normalize output, avoid clipping of final output by scaling the final 126 | # frames. 127 | "normalize" : True, 128 | # Defines the number of potential grains to choose from matches when 129 | # synthesizing output. 130 | "match_quantity": 5 131 | } 132 | 133 | # Specifies the format for the output file. Changing this has not been tested 134 | # so may produce errors/undesirable results. 135 | output_file = { 136 | "samplerate": 44100, 137 | "format": 131075, 138 | "channels": 1 139 | } 140 | -------------------------------------------------------------------------------- /src/sppysound/analysis/CentroidAnalysis.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function, division 3 | import os 4 | import numpy as np 5 | import logging 6 | from scipy import signal 7 | from numpy.lib import stride_tricks 8 | import pdb 9 | 10 | 11 | from AnalysisTools import ButterFilter 12 | from fileops import pathops 13 | 14 | from Analysis import Analysis 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class CentroidAnalysis(Analysis): 20 | 21 | """ 22 | Temporal centroid descriptor class for generation of temporal centroid 23 | audio analysis. 24 | 25 | This descriptor calculates the temporal centroid for overlapping grains of 26 | an AnalysedAudioFile object. A full definition of temporal centroid 27 | analysis can be found in the documentation. 28 | 29 | Arguments: 30 | 31 | - analysis_group: the HDF5 file group to use for the storage of the 32 | analysis. 33 | 34 | - config: The configuration module used to configure the analysis 35 | """ 36 | 37 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 38 | super(CentroidAnalysis, self).__init__(AnalysedAudioFile, frames, analysis_group, 'Centroid') 39 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 40 | # Store reference to the file to be analysed 41 | self.AnalysedAudioFile = AnalysedAudioFile 42 | 43 | self.analysis_group = analysis_group 44 | self.logger.info("Creating Centroid analysis for {0}".format(self.AnalysedAudioFile.name)) 45 | self.create_analysis(frames) 46 | 47 | @staticmethod 48 | def create_centroid_analysis(frames, window_size=512, 49 | window=signal.triang, 50 | overlapFac=0.5): 51 | """ 52 | Calculate the Centroid values of windowed segments of the audio file and 53 | save to disk. 54 | """ 55 | if hasattr(frames, '__call__'): 56 | frames = frames() 57 | 58 | # Calculate the period of the window in hz 59 | # lowest_freq = 1.0 / window_size 60 | # Filter frequencies lower than the period of the window 61 | # filter = ButterFilter() 62 | # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate) 63 | # TODO: Fix filter 64 | # frames = filter.filter_butter(frames) 65 | 66 | # Generate a window function to apply to centroid windows before analysis 67 | win = window(window_size) 68 | hopSize = int(window_size - np.floor(overlapFac * window_size)) 69 | 70 | # zeros at beginning (thus center of 1st window should be for sample nr. 0) 71 | samples = np.append(np.zeros(np.floor(window_size/2.0)), frames) 72 | 73 | # cols for windowing 74 | cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1 75 | # zeros at end (thus samples can be fully covered by frames) 76 | samples = np.append(samples, np.zeros(window_size)) 77 | 78 | frames = stride_tricks.as_strided( 79 | samples, 80 | shape=(cols, window_size), 81 | strides=(samples.strides[0]*hopSize, samples.strides[0]) 82 | ).copy() 83 | 84 | frames *= win 85 | weighted_sum = np.sum((np.arange(frames.shape[1])+1) * frames, axis=1) 86 | 87 | centroid = weighted_sum / np.sum(frames, axis=1) 88 | 89 | return centroid 90 | 91 | def hdf5_dataset_formatter(self, *args, **kwargs): 92 | ''' 93 | Formats the output from the analysis method to save to the HDF5 file. 94 | ''' 95 | samplerate = self.AnalysedAudioFile.samplerate 96 | centroid = self.create_centroid_analysis(*args, **kwargs) 97 | centroid_times = self.calc_centroid_frame_times(centroid, args[0], samplerate) 98 | return ({'frames': centroid, 'times': centroid_times}, {}) 99 | 100 | @staticmethod 101 | def calc_centroid_frame_times(centroidframes, sample_frames, samplerate): 102 | 103 | """Calculate times for frames using sample size and samplerate.""" 104 | 105 | if hasattr(sample_frames, '__call__'): 106 | sample_frames = sample_frames() 107 | # Get number of frames for time and frequency 108 | timebins = centroidframes.shape[0] 109 | # Create array ranging from 0 to number of time frames 110 | scale = np.arange(timebins+1) 111 | # divide the number of samples by the total number of frames, then 112 | # multiply by the frame numbers. 113 | centroid_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float) 114 | # Divide by the samplerate to give times in seconds 115 | centroid_times = centroid_times / samplerate 116 | return centroid_times 117 | -------------------------------------------------------------------------------- /src/sppysound/analysis/VarianceAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import numpy as np 4 | import logging 5 | from scipy import signal 6 | from numpy.lib import stride_tricks 7 | import pdb 8 | 9 | 10 | from AnalysisTools import ButterFilter 11 | from fileops import pathops 12 | 13 | from Analysis import Analysis 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class VarianceAnalysis(Analysis): 19 | 20 | """ 21 | Variance descriptor class for generation of variance audio analysis. 22 | 23 | This descriptor calculates the Root Mean Square analysis for overlapping 24 | grains of an AnalysedAudioFile object. A full definition of variance 25 | analysis can be found in the documentation. 26 | 27 | Arguments: 28 | 29 | - analysis_group: the HDF5 file group to use for the storage of the 30 | analysis. 31 | 32 | - config: The configuration module used to configure the analysis 33 | """ 34 | 35 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 36 | super(VarianceAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'variance') 37 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 38 | # Store reference to the file to be analysed 39 | self.AnalysedAudioFile = AnalysedAudioFile 40 | 41 | if config: 42 | self.window_size = config.variance["window_size"] * self.AnalysedAudioFile.samplerate / 1000 43 | self.overlap = 1. / config.variance["overlap"] 44 | 45 | self.analysis_group = analysis_group 46 | self.logger.info("Creating variance analysis for {0}".format(self.AnalysedAudioFile.name)) 47 | self.create_analysis(frames, self.window_size, overlapFac=self.overlap) 48 | 49 | @staticmethod 50 | def create_variance_analysis(frames, window_size=512, 51 | overlapFac=0.5): 52 | """ 53 | Generate an energy contour analysis. 54 | 55 | Calculate the Variance values of windowed segments of the audio file and 56 | save to disk. 57 | """ 58 | # Calculate the period of the window in hz 59 | # lowest_freq = 1.0 / window_size 60 | # Filter frequencies lower than the period of the window 61 | # filter = ButterFilter() 62 | # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate) 63 | # TODO: Fix filter 64 | # frames = filter.filter_butter(frames) 65 | 66 | if hasattr(frames, '__call__'): 67 | frames = frames() 68 | hopSize = int(window_size - np.floor(overlapFac * window_size)) 69 | 70 | # zeros at beginning (thus center of 1st window should be for sample nr. 0) 71 | samples = np.append(np.zeros(np.floor(window_size/2.0)), frames) 72 | 73 | # cols for windowing 74 | cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1 75 | # zeros at end (thus samples can be fully covered by frames) 76 | samples = np.append(samples, np.zeros(window_size)) 77 | 78 | frames = stride_tricks.as_strided( 79 | samples, 80 | shape=(cols, window_size), 81 | strides=(samples.strides[0]*hopSize, samples.strides[0]) 82 | ).copy() 83 | 84 | frame_mean = np.mean(frames, axis=1) 85 | variance = (1 / window_size) * np.sum((frames-np.vstack(frame_mean))**2, axis=1) 86 | 87 | return variance 88 | 89 | def hdf5_dataset_formatter(self, *args, **kwargs): 90 | ''' 91 | Formats the output from the analysis method to save to the HDF5 file. 92 | ''' 93 | samplerate = self.AnalysedAudioFile.samplerate 94 | variance = self.create_variance_analysis(*args, **kwargs) 95 | variance_times = self.calc_variance_frame_times(variance, args[0], samplerate) 96 | return ({'frames': variance, 'times': variance_times}, {}) 97 | 98 | @staticmethod 99 | def calc_variance_frame_times(varianceframes, sample_frames, samplerate): 100 | 101 | """Calculate times for frames using sample size and samplerate.""" 102 | 103 | if hasattr(sample_frames, '__call__'): 104 | sample_frames = sample_frames() 105 | # Get number of frames for time and frequency 106 | timebins = varianceframes.shape[0] 107 | # Create array ranging from 0 to number of time frames 108 | scale = np.arange(timebins+1) 109 | # divide the number of samples by the total number of frames, then 110 | # multiply by the frame numbers. 111 | variance_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float) 112 | # Divide by the samplerate to give times in seconds 113 | variance_times = variance_times / samplerate 114 | return variance_times 115 | -------------------------------------------------------------------------------- /src/sppysound/analysis/SpectralCrestFactorAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | import logging 4 | import pdb 5 | import warnings 6 | 7 | from Analysis import Analysis 8 | 9 | class SpectralCrestFactorAnalysis(Analysis): 10 | """ 11 | Spectral crest factor descriptor class for generation of spectral crest 12 | factor audio analysis. 13 | 14 | This descriptor calculates the spectral crest factor for overlapping grains 15 | of an AnalysedAudioFile object. A full definition can be found in the 16 | documentation. 17 | 18 | Arguments: 19 | 20 | - analysis_group: the HDF5 file group to use for the storage of the 21 | analysis. 22 | 23 | - config: The configuration module used to configure the analysis 24 | """ 25 | 26 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 27 | super(SpectralCrestFactorAnalysis, self).__init__(AnalysedAudioFile, frames, analysis_group, 'SpcCrestFactor') 28 | # Create logger for module 29 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 30 | # Store reference to the file to be analysed 31 | self.AnalysedAudioFile = AnalysedAudioFile 32 | self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2. 33 | try: 34 | fft = self.AnalysedAudioFile.analyses["fft"] 35 | except KeyError: 36 | raise KeyError("FFT analysis is required for spectral spread " 37 | "analysis.") 38 | 39 | self.analysis_group = analysis_group 40 | self.logger.info("Creating Spectral CrestFactor analysis for {0}".format(self.AnalysedAudioFile.name)) 41 | self.create_analysis( 42 | self.create_spccf_analysis, 43 | fft.analysis['frames'], 44 | ) 45 | self.spccf_window_count = None 46 | 47 | def hdf5_dataset_formatter(self, analysis_method, *args, **kwargs): 48 | ''' 49 | Formats the output from the analysis method to save to the HDF5 file. 50 | ''' 51 | samplerate = self.AnalysedAudioFile.samplerate 52 | output = self.create_spccf_analysis(*args, **kwargs) 53 | times = self.calc_spccf_frame_times(output, self.AnalysedAudioFile.frames, samplerate) 54 | return ({'frames': output, 'times': times}, {}) 55 | 56 | @staticmethod 57 | def create_spccf_analysis(fft): 58 | ''' 59 | Calculate the spectral crest factor of the fft frames. 60 | ''' 61 | fft = fft[:] 62 | # Get the positive magnitudes of each bin. 63 | magnitudes = np.abs(fft) 64 | # Get highest magnitude 65 | if not np.nonzero(magnitudes)[0].size: 66 | y = np.empty(magnitudes.shape[0]) 67 | y.fill(np.nan) 68 | return y 69 | # Get the highest magnitude value for each spectral frame 70 | max_bins = np.max(magnitudes, axis=1) 71 | mag_sum = np.sum(magnitudes, axis=1) 72 | with warnings.catch_warnings(): 73 | warnings.filterwarnings('ignore') 74 | spectral_cf = max_bins / mag_sum 75 | 76 | return spectral_cf 77 | 78 | @staticmethod 79 | def calc_spccf_frame_times(spccf_frames, sample_frame_count, samplerate): 80 | 81 | """Calculate times for frames using sample size and samplerate.""" 82 | 83 | # Get number of frames for time and frequency 84 | timebins = spccf_frames.shape[0] 85 | # Create array ranging from 0 to number of time frames 86 | scale = np.arange(timebins+1) 87 | # divide the number of samples by the total number of frames, then 88 | # multiply by the frame numbers. 89 | spccf_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float) 90 | # Divide by the samplerate to give times in seconds 91 | spccf_times = spccf_times / samplerate 92 | return spccf_times 93 | 94 | def mean_formatter(self, data): 95 | """Calculate the mean value of the analysis data""" 96 | 97 | values = data[0] 98 | 99 | output = np.empty(len(values)) 100 | for ind, i in enumerate(values): 101 | mean_i = np.mean(i) 102 | if mean_i == 0: 103 | output[ind] = np.nan 104 | else: 105 | output[ind] = np.log10(np.mean(i))/self.nyquist_rate 106 | return output 107 | 108 | def median_formatter(self, data): 109 | """Calculate the median value of the analysis data""" 110 | values = data[0] 111 | 112 | output = np.empty(len(data)) 113 | for ind, i in enumerate(values): 114 | median_i = np.median(i) 115 | if median_i == 0: 116 | output[ind] = np.nan 117 | else: 118 | output[ind] = np.log10(np.median(i))/self.nyquist_rate 119 | return output 120 | -------------------------------------------------------------------------------- /src/sppysound/analysis/SpectralFlatnessAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import scipy.stats as stats 3 | import numpy as np 4 | import logging 5 | import pdb 6 | import warnings 7 | 8 | from Analysis import Analysis 9 | 10 | class SpectralFlatnessAnalysis(Analysis): 11 | """ 12 | Spectral flatness descriptor class for generation of spectral flatness 13 | audio analysis. 14 | 15 | This descriptor calculates the spectral flatness for overlapping grains of 16 | an AnalysedAudioFile object. A full definition can be found in the 17 | documentation. 18 | 19 | Arguments: 20 | 21 | - analysis_group: the HDF5 file group to use for the storage of the 22 | analysis. 23 | 24 | - config: The configuration module used to configure the analysis 25 | """ 26 | 27 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 28 | super(SpectralFlatnessAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'SpcFlatness') 29 | # Create logger for module 30 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 31 | # Store reference to the file to be analysed 32 | self.AnalysedAudioFile = AnalysedAudioFile 33 | self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2. 34 | try: 35 | fft = self.AnalysedAudioFile.analyses["fft"] 36 | except KeyError: 37 | raise KeyError("FFT analysis is required for spectral spread " 38 | "analysis.") 39 | 40 | self.analysis_group = analysis_group 41 | self.logger.info("Creating Spectral Flatness analysis for {0}".format(self.AnalysedAudioFile.name)) 42 | self.create_analysis( 43 | self.create_spcflatness_analysis, 44 | fft.analysis['frames'], 45 | ) 46 | self.spcflatness_window_count = None 47 | 48 | def hdf5_dataset_formatter(self, analysis_method, *args, **kwargs): 49 | ''' 50 | Formats the output from the analysis method to save to the HDF5 file. 51 | ''' 52 | samplerate = self.AnalysedAudioFile.samplerate 53 | output = self.create_spcflatness_analysis(*args, **kwargs) 54 | times = self.calc_spcflatness_frame_times(output, self.AnalysedAudioFile.frames, samplerate) 55 | return ({'frames': output, 'times': times}, {}) 56 | 57 | @staticmethod 58 | def create_spcflatness_analysis(fft): 59 | ''' 60 | Calculate the spectral flatness of the fft frames. 61 | ''' 62 | fft = fft[:] 63 | # Get the positive magnitudes of each bin. 64 | magnitudes = np.abs(fft) 65 | if not np.nonzero(magnitudes)[0].size: 66 | y = np.empty(magnitudes.shape[0]) 67 | y.fill(np.nan) 68 | return y 69 | 70 | # Calculate the ratio between the two. 71 | with warnings.catch_warnings(): 72 | warnings.filterwarnings('ignore') 73 | # Calculate the geometric mean of magnitudes 74 | geo_mean = np.e**np.mean(np.log(magnitudes), axis=1) 75 | # Calculate the arithmetic mean of magnitudes 76 | arith_mean = np.mean(magnitudes, axis=1) 77 | spectral_flatness = geo_mean / arith_mean 78 | 79 | return spectral_flatness 80 | 81 | @staticmethod 82 | def calc_spcflatness_frame_times(spcflatness_frames, sample_frame_count, samplerate): 83 | 84 | """Calculate times for frames using sample size and samplerate.""" 85 | 86 | # Get number of frames for time and frequency 87 | timebins = spcflatness_frames.shape[0] 88 | # Create array ranging from 0 to number of time frames 89 | scale = np.arange(timebins+1) 90 | # divide the number of samples by the total number of frames, then 91 | # multiply by the frame numbers. 92 | spcflatness_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float) 93 | # Divide by the samplerate to give times in seconds 94 | spcflatness_times = spcflatness_times / samplerate 95 | return spcflatness_times 96 | 97 | def mean_formatter(self, data): 98 | """Calculate the mean value of the analysis data""" 99 | 100 | values = data[0] 101 | 102 | output = np.empty(len(values)) 103 | for ind, i in enumerate(values): 104 | mean_i = np.mean(i) 105 | if mean_i == 0: 106 | output[ind] = np.nan 107 | else: 108 | output[ind] = np.log10(np.mean(i))/self.nyquist_rate 109 | return output 110 | 111 | def median_formatter(self, data): 112 | """Calculate the median value of the analysis data""" 113 | values = data[0] 114 | 115 | output = np.empty(len(data)) 116 | for ind, i in enumerate(values): 117 | median_i = np.median(i) 118 | if median_i == 0: 119 | output[ind] = np.nan 120 | else: 121 | output[ind] = np.log10(np.median(i))/self.nyquist_rate 122 | return output 123 | -------------------------------------------------------------------------------- /src/sppysound/analysis/SpectralFluxAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | import logging 4 | import pdb 5 | 6 | from Analysis import Analysis 7 | 8 | class SpectralFluxAnalysis(Analysis): 9 | """ 10 | Spectral flux descriptor class for generation of spectral flux audio 11 | analysis. 12 | 13 | This descriptor calculates the spectral flux for overlapping grains of an 14 | AnalysedAudioFile object. A full definition of spectral flux analysis can 15 | be found in the documentation. 16 | 17 | Arguments: 18 | 19 | - analysis_group: the HDF5 file group to use for the storage of the 20 | analysis. 21 | 22 | - config: The configuration module used to configure the analysis 23 | """ 24 | 25 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 26 | super(SpectralFluxAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'SpcFlux') 27 | # Create logger for module 28 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 29 | # Store reference to the file to be analysed 30 | self.AnalysedAudioFile = AnalysedAudioFile 31 | self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2. 32 | try: 33 | fft = self.AnalysedAudioFile.analyses["fft"] 34 | except KeyError: 35 | raise KeyError("FFT analysis is required for spectral spread " 36 | "analysis.") 37 | 38 | self.analysis_group = analysis_group 39 | self.logger.info("Creating Spectral Flux analysis for {0}".format(self.AnalysedAudioFile.name)) 40 | self.create_analysis( 41 | self.create_spcflux_analysis, 42 | fft.analysis['frames'], 43 | ) 44 | self.spcflux_window_count = None 45 | 46 | def hdf5_dataset_formatter(self, analysis_method, *args, **kwargs): 47 | ''' 48 | Formats the output from the analysis method to save to the HDF5 file. 49 | ''' 50 | samplerate = self.AnalysedAudioFile.samplerate 51 | output = self.create_spcflux_analysis(*args, **kwargs) 52 | times = self.calc_spcflux_frame_times(output, self.AnalysedAudioFile.frames, samplerate) 53 | return ({'frames': output, 'times': times}, {}) 54 | 55 | @staticmethod 56 | def create_spcflux_analysis(fft): 57 | ''' 58 | Calculate the spectral flux of the fft frames. 59 | 60 | length: the length of the window used to calculate the FFT. 61 | output_format = Choose either "freq" for output in Hz or "ind" for bin 62 | index output 63 | ''' 64 | fft = fft[:] 65 | # Get the positive magnitudes of each bin. 66 | magnitudes = np.abs(fft) 67 | if not np.nonzero(magnitudes)[0].size: 68 | y = np.empty(magnitudes.shape[0]) 69 | y.fill(np.nan) 70 | return y 71 | # Roll magnitudes as flux is calculated using the difference between 72 | # consecutive magnitudes. Rolling allows for quick access to previous 73 | # magnitude. 74 | rolled_mags = np.roll(magnitudes, 1, axis=0)[1:] 75 | sum_of_squares = np.sum((magnitudes[1:]-rolled_mags)**2., axis=1) 76 | spectral_flux = np.sqrt(sum_of_squares) / (np.size(fft, axis=1)) 77 | 78 | return spectral_flux 79 | 80 | @staticmethod 81 | def calc_spcflux_frame_times(spcflux_frames, sample_frame_count, samplerate): 82 | 83 | """Calculate times for frames using sample size and samplerate.""" 84 | 85 | # Get number of frames for time and frequency 86 | timebins = spcflux_frames.shape[0] 87 | if not timebins: 88 | return np.array([]) 89 | # Create array ranging from 0 to number of time frames 90 | scale = np.arange(timebins+1) 91 | # divide the number of samples by the total number of frames, then 92 | # multiply by the frame numbers. 93 | spcflux_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float) 94 | # Divide by the samplerate to give times in seconds 95 | spcflux_times = spcflux_times / samplerate 96 | return spcflux_times 97 | 98 | def mean_formatter(self, data): 99 | """Calculate the mean value of the analysis data""" 100 | 101 | values = data[0] 102 | 103 | output = np.empty(len(values)) 104 | for ind, i in enumerate(values): 105 | mean_i = np.mean(i) 106 | if mean_i == 0: 107 | output[ind] = np.nan 108 | else: 109 | output[ind] = np.log10(np.mean(i))/self.nyquist_rate 110 | return output 111 | 112 | def median_formatter(self, data): 113 | """Calculate the median value of the analysis data""" 114 | values = data[0] 115 | 116 | output = np.empty(len(data)) 117 | for ind, i in enumerate(values): 118 | median_i = np.median(i) 119 | if median_i == 0: 120 | output[ind] = np.nan 121 | else: 122 | output[ind] = np.log10(np.median(i))/self.nyquist_rate 123 | return output 124 | -------------------------------------------------------------------------------- /src/sppysound/analysis/SkewnessAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import numpy as np 4 | import logging 5 | from scipy import signal 6 | from numpy.lib import stride_tricks 7 | import pdb 8 | 9 | from fileops import pathops 10 | 11 | from Analysis import Analysis 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class SkewnessAnalysis(Analysis): 17 | 18 | """ 19 | Skewness descriptor class for generation of temporal skewness audio analysis. 20 | 21 | This descriptor calculates thetemporal skewness for overlapping grains of 22 | an AnalysedAudioFile object. A full definition of skewness analysis can be 23 | found in the documentation. 24 | 25 | Arguments: 26 | 27 | - analysis_group: the HDF5 file group to use for the storage of the 28 | analysis. 29 | 30 | - config: The configuration module used to configure the analysis 31 | """ 32 | 33 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 34 | super(SkewnessAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'skewness') 35 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 36 | # Store reference to the file to be analysed 37 | self.AnalysedAudioFile = AnalysedAudioFile 38 | 39 | if config: 40 | self.window_size = config.skewness["window_size"] * self.AnalysedAudioFile.samplerate / 1000 41 | self.overlap = 1. / config.skewness["overlap"] 42 | 43 | try: 44 | variance = self.AnalysedAudioFile.analyses["variance"] 45 | except KeyError: 46 | raise KeyError("Variance analysis is required for skewness " 47 | "analysis.") 48 | 49 | self.analysis_group = analysis_group 50 | self.logger.info("Creating skewness analysis for {0}".format(self.AnalysedAudioFile.name)) 51 | self.create_analysis(frames, variance.analysis['frames'][:], self.window_size, overlapFac=self.overlap) 52 | 53 | @staticmethod 54 | def create_skewness_analysis( 55 | frames, 56 | variance, 57 | window_size=512, 58 | window=signal.hanning, 59 | overlapFac=0.5 60 | ): 61 | """ 62 | Calculate the skewness values of windowed segments of the audio file and 63 | save to disk. 64 | """ 65 | if hasattr(frames, '__call__'): 66 | frames = frames() 67 | # Calculate the period of the window in hz 68 | # lowest_freq = 1.0 / window_size 69 | # Filter frequencies lower than the period of the window 70 | # filter = ButterFilter() 71 | # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate) 72 | # TODO: Fix filter 73 | # frames = filter.filter_butter(frames) 74 | 75 | hopSize = int(window_size - np.floor(overlapFac * window_size)) 76 | 77 | # zeros at beginning (thus center of 1st window should be for sample nr. 0) 78 | samples = np.append(np.zeros(np.floor(window_size/2.0)), frames) 79 | 80 | # cols for windowing 81 | cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1 82 | # zeros at end (thus samples can be fully covered by frames) 83 | samples = np.append(samples, np.zeros(window_size)) 84 | 85 | frames = stride_tricks.as_strided( 86 | samples, 87 | shape=(cols, window_size), 88 | strides=(samples.strides[0]*hopSize, samples.strides[0]) 89 | ).copy() 90 | 91 | if window: 92 | win = window(window_size) 93 | frames *= win 94 | 95 | frame_mean = np.mean(frames, axis=1) 96 | 97 | variance_cubed = np.sqrt(variance)**3 98 | 99 | a = ((1 / window_size)) * np.sum(((frames-np.vstack(frame_mean))**3), axis=1) 100 | skewness = a / variance_cubed 101 | 102 | return skewness 103 | 104 | def hdf5_dataset_formatter(self, *args, **kwargs): 105 | ''' 106 | Formats the output from the analysis method to save to the HDF5 file. 107 | ''' 108 | samplerate = self.AnalysedAudioFile.samplerate 109 | skewness = self.create_skewness_analysis(*args, **kwargs) 110 | skewness_times = self.calc_skewness_frame_times(skewness, args[0], samplerate) 111 | return ({'frames': skewness, 'times': skewness_times}, {}) 112 | 113 | @staticmethod 114 | def calc_skewness_frame_times(skewnessframes, sample_frames, samplerate): 115 | 116 | """Calculate times for frames using sample size and samplerate.""" 117 | 118 | if hasattr(sample_frames, '__call__'): 119 | sample_frames = sample_frames() 120 | # Get number of frames for time and frequency 121 | timebins = skewnessframes.shape[0] 122 | # Create array ranging from 0 to number of time frames 123 | scale = np.arange(timebins+1) 124 | # divide the number of samples by the total number of frames, then 125 | # multiply by the frame numbers. 126 | skewness_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float) 127 | # Divide by the samplerate to give times in seconds 128 | skewness_times = skewness_times / samplerate 129 | return skewness_times 130 | -------------------------------------------------------------------------------- /src/sppysound/analysis/KurtosisAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import numpy as np 4 | import logging 5 | from scipy import signal 6 | from numpy.lib import stride_tricks 7 | import pdb 8 | 9 | from fileops import pathops 10 | 11 | from Analysis import Analysis 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class KurtosisAnalysis(Analysis): 17 | 18 | """ 19 | Kurtosis descriptor class for generation of Kurtosis audio analysis. 20 | 21 | This descriptor calculates the temporal kurtosis for overlapping grains of 22 | an AnalysedAudioFile object. A full definition of kurtosis analysis can be found 23 | in the documentation. 24 | 25 | Arguments: 26 | 27 | - analysis_group: the HDF5 file group to use for the storage of the 28 | analysis. 29 | 30 | - config: The configuration module used to configure the analysis 31 | """ 32 | 33 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 34 | super(KurtosisAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'kurtosis') 35 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 36 | # Store reference to the file to be analysed 37 | self.AnalysedAudioFile = AnalysedAudioFile 38 | 39 | if config: 40 | self.window_size = config.kurtosis["window_size"] * self.AnalysedAudioFile.samplerate / 1000 41 | self.overlap = 1. / config.kurtosis["overlap"] 42 | 43 | try: 44 | variance = self.AnalysedAudioFile.analyses["variance"] 45 | except KeyError: 46 | raise KeyError("Variance analysis is required for Kurtosis " 47 | "analysis.") 48 | 49 | self.analysis_group = analysis_group 50 | self.logger.info("Creating kurtosis analysis for {0}".format(self.AnalysedAudioFile.name)) 51 | self.create_analysis(frames, variance.analysis['frames'][:], self.window_size, overlapFac=self.overlap) 52 | 53 | @staticmethod 54 | def create_kurtosis_analysis( 55 | frames, 56 | variance, 57 | window_size=512, 58 | window=signal.hanning, 59 | overlapFac=0.5 60 | ): 61 | """ 62 | Calculate the Kurtosis values of windowed segments of the audio file and 63 | save to disk. 64 | """ 65 | if hasattr(frames, '__call__'): 66 | frames = frames() 67 | # Calculate the period of the window in hz 68 | # lowest_freq = 1.0 / window_size 69 | # Filter frequencies lower than the period of the window 70 | # filter = ButterFilter() 71 | # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate) 72 | # TODO: Fix filter 73 | # frames = filter.filter_butter(frames) 74 | 75 | hopSize = int(window_size - np.floor(overlapFac * window_size)) 76 | 77 | # zeros at beginning (thus center of 1st window should be for sample nr. 0) 78 | samples = np.append(np.zeros(np.floor(window_size/2.0)), frames) 79 | 80 | # cols for windowing 81 | cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1 82 | # zeros at end (thus samples can be fully covered by frames) 83 | samples = np.append(samples, np.zeros(window_size)) 84 | 85 | frames = stride_tricks.as_strided( 86 | samples, 87 | shape=(cols, window_size), 88 | strides=(samples.strides[0]*hopSize, samples.strides[0]) 89 | ).copy() 90 | 91 | if window: 92 | win = window(window_size) 93 | frames *= win 94 | 95 | frame_mean = np.mean(frames, axis=1) 96 | 97 | variance_sqrd = variance**2 98 | 99 | a = ((1 / window_size)) * np.sum(((frames-np.vstack(frame_mean))**4), axis=1) 100 | kurtosis = a / variance_sqrd 101 | kurtosis -= 3 102 | 103 | return kurtosis 104 | 105 | def hdf5_dataset_formatter(self, *args, **kwargs): 106 | ''' 107 | Formats the output from the analysis method to save to the HDF5 file. 108 | ''' 109 | samplerate = self.AnalysedAudioFile.samplerate 110 | kurtosis = self.create_kurtosis_analysis(*args, **kwargs) 111 | kurtosis_times = self.calc_kurtosis_frame_times(kurtosis, args[0], samplerate) 112 | return ({'frames': kurtosis, 'times': kurtosis_times}, {}) 113 | 114 | @staticmethod 115 | def calc_kurtosis_frame_times(kurtosisframes, sample_frames, samplerate): 116 | 117 | """Calculate times for frames using sample size and samplerate.""" 118 | 119 | if hasattr(sample_frames, '__call__'): 120 | sample_frames = sample_frames() 121 | # Get number of frames for time and frequency 122 | timebins = kurtosisframes.shape[0] 123 | # Create array ranging from 0 to number of time frames 124 | scale = np.arange(timebins+1) 125 | # divide the number of samples by the total number of frames, then 126 | # multiply by the frame numbers. 127 | kurtosis_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float) 128 | # Divide by the samplerate to give times in seconds 129 | kurtosis_times = kurtosis_times / samplerate 130 | return kurtosis_times 131 | -------------------------------------------------------------------------------- /src/sppysound/analysis/RMSAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import numpy as np 4 | import logging 5 | from scipy import signal 6 | from numpy.lib import stride_tricks 7 | import pdb 8 | from scipy.signal import butter, lfilter 9 | 10 | 11 | from AnalysisTools import ButterFilter 12 | from fileops import pathops 13 | 14 | from Analysis import Analysis 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class RMSAnalysis(Analysis): 20 | 21 | """ 22 | RMS descriptor class for generation of RMS audio analysis. 23 | 24 | This descriptor calculates the Root Mean Square analysis for overlapping 25 | grains of an AnalysedAudioFile object. A full definition of RMS analysis 26 | can be found in the documentation. 27 | 28 | Arguments: 29 | 30 | - analysis_group: the HDF5 file group to use for the storage of the 31 | analysis. 32 | 33 | - config: The configuration module used to configure the analysis 34 | """ 35 | 36 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 37 | super(RMSAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'RMS') 38 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 39 | # Store reference to the file to be analysed 40 | self.AnalysedAudioFile = AnalysedAudioFile 41 | 42 | if config: 43 | self.window_size = config.rms["window_size"] * self.AnalysedAudioFile.samplerate / 1000 44 | self.overlap = 1. / config.rms["overlap"] 45 | else: 46 | self.window_size=512 47 | self.overlap = 0.5 48 | 49 | self.analysis_group = analysis_group 50 | self.logger.info("Creating RMS analysis for {0}".format(self.AnalysedAudioFile.name)) 51 | self.create_analysis(frames, self.AnalysedAudioFile.samplerate, window_size=self.window_size, overlapFac=self.overlap, ) 52 | 53 | @staticmethod 54 | def create_rms_analysis( 55 | frames, 56 | samplerate, 57 | window_size=512, 58 | window=signal.hanning, 59 | overlapFac=0.5 60 | ): 61 | """ 62 | Generate RMS contour analysis. 63 | 64 | Calculate the RMS values of windowed segments of the audio file and 65 | save to disk. 66 | """ 67 | if hasattr(frames, '__call__'): 68 | frames = frames() 69 | def butter_lowpass(cutoff, fs, order=5): 70 | # red: taken from http://stackoverflow.com/questions/25191620/creating-lowpass-filter-in-scipy-understanding-methods-and-units 71 | nyq = 0.5 * fs 72 | normal_cutoff = cutoff / nyq 73 | b, a = butter(order, normal_cutoff, btype='highpass', analog=False) 74 | return b, a 75 | def butter_lowpass_filter(data, cutoff, fs, order=5): 76 | # red: taken from http://stackoverflow.com/questions/25191620/creating-lowpass-filter-in-scipy-understanding-methods-and-units 77 | b, a = butter_lowpass(cutoff, fs, order=order) 78 | y = lfilter(b, a, data) 79 | return y 80 | 81 | 82 | # Calculate the period of the window in hz 83 | lowest_freq = 1.0 / (window_size / samplerate) 84 | frames = butter_lowpass_filter(frames, lowest_freq, samplerate) 85 | 86 | 87 | # Generate a window function to apply to rms windows before analysis 88 | hopSize = int(window_size - np.floor(overlapFac * window_size)) 89 | 90 | # zeros at beginning (thus center of 1st window should be for sample nr. 0) 91 | samples = np.append(np.zeros(np.floor(window_size/2.0)), frames) 92 | 93 | # cols for windowing 94 | cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1 95 | # zeros at end (thus samples can be fully covered by frames) 96 | samples = np.append(samples, np.zeros(window_size)) 97 | 98 | frames = stride_tricks.as_strided( 99 | samples, 100 | shape=(cols, window_size), 101 | strides=(samples.strides[0]*hopSize, samples.strides[0]) 102 | ).copy() 103 | 104 | if window: 105 | win = window(window_size) 106 | frames *= win 107 | rms = np.sqrt(np.mean(np.square(np.abs(frames)), axis=1)) 108 | 109 | return rms 110 | 111 | 112 | def hdf5_dataset_formatter(self, *args, **kwargs): 113 | ''' 114 | Formats the output from the analysis method to save to the HDF5 file. 115 | ''' 116 | samplerate = self.AnalysedAudioFile.samplerate 117 | rms = self.create_rms_analysis(*args, **kwargs) 118 | rms_times = self.calc_rms_frame_times(rms, args[0], samplerate) 119 | return ({'frames': rms, 'times': rms_times}, {}) 120 | 121 | @staticmethod 122 | def calc_rms_frame_times(rmsframes, sample_frames, samplerate): 123 | 124 | """Calculate times for frames using sample size and samplerate.""" 125 | 126 | if hasattr(sample_frames, '__call__'): 127 | sample_frames = sample_frames() 128 | # Get number of frames for time and frequency 129 | timebins = rmsframes.shape[0] 130 | # Create array ranging from 0 to number of time frames 131 | scale = np.arange(timebins+1) 132 | # divide the number of samples by the total number of frames, then 133 | # multiply by the frame numbers. 134 | rms_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float) 135 | # Divide by the samplerate to give times in seconds 136 | rms_times = rms_times / samplerate 137 | return rms_times 138 | -------------------------------------------------------------------------------- /src/sppysound/Examples/Database Matching Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 15, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sppysound.database import AudioDatabase, Matcher\n", 12 | "import matching_config" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 16, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "source_dir = \"./ExampleDatabase\"\n", 24 | "target_dir = \"./ExampleTarget\"\n", 25 | "output_dir = \"./ExampleOutput\"\n", 26 | "analysis_list = [\"rms\", \"f0\"]" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "Load source and target databases for matching..." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 10, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "source_database = AudioDatabase(source_dir, analysis_list=analysis_list, config=matching_config)\n", 45 | "source_database.load_database(reanalyse=False)\n", 46 | "target_database = AudioDatabase(target_dir, analysis_list=analysis_list, config=matching_config)\n", 47 | "target_database.load_database(reanalyse=False)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "An output database must also be defined. This is to store matching results and synthesis results generated later.\n", 55 | "Note that an analysis list was not defined for this as it will not be analysed" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 11, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "output_database = AudioDatabase(output_dir, config=matching_config)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "The database must still be loaded to check for previous HDF5 files to use for results" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 12, 79 | "metadata": { 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "output_database.load_database(reanalyse=False)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "A matcher object is then created using the loaded databases, ready to perform matching. The rematch argument can be set to discard any previously found matches from pre-existing HDF5 files, otherwise previously found matches will cause the program to terminate for their preservation." 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 13, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "matcher = Matcher(\n", 103 | " source_database,\n", 104 | " target_database,\n", 105 | " output_db=output_database,\n", 106 | " config=matching_config,\n", 107 | " rematch=True\n", 108 | ")" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "The matching is then run using the brute force matcher method. Other methods are not currently available.\n", 116 | "\n", 117 | "Warnings may be produced during this process. These will be silenced in a future revision but do not affect results." 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 14, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "matcher.match(\n", 129 | " matcher.brute_force_matcher,\n", 130 | ")" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "The output database will now contain a HDF5 file containing matching data for the two databases. This can be used to synthesize results." 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "The matching_config file for this demo is:\n", 145 | "~~~python\n", 146 | "rms = {\n", 147 | " \"window_size\": 100,\n", 148 | " \"overlap\": 2,\n", 149 | "}\n", 150 | "\n", 151 | "analysis_dict = {\n", 152 | " \"f0\": \"log2_median\",\n", 153 | " \"rms\": \"mean\"\n", 154 | "}\n", 155 | "\n", 156 | "matcher_weightings = {\n", 157 | " \"f0\" : 1.,\n", 158 | " \"rms\": 1.\n", 159 | "}\n", 160 | "\n", 161 | "analysis = {\n", 162 | " \"reanalyse\": False\n", 163 | "}\n", 164 | "\n", 165 | "matcher = {\n", 166 | " \"rematch\": False,\n", 167 | " \"grain_size\": 100,\n", 168 | " \"overlap\": 2,\n", 169 | " # Defines the number of matches to keep for synthesis.\n", 170 | " \"match_quantity\": 20\n", 171 | "}\n", 172 | "\n", 173 | "output_file = {\n", 174 | " \"samplerate\": 44100,\n", 175 | " \"format\": 131075,\n", 176 | " \"channels\": 1\n", 177 | "}\n", 178 | "~~~" 179 | ] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "Python 2", 185 | "language": "python", 186 | "name": "python2" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 2 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython2", 198 | "version": "2.7.10" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 0 203 | } 204 | -------------------------------------------------------------------------------- /src/sppysound/analysis/SpectralSpreadAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | import logging 4 | import pdb 5 | 6 | from Analysis import Analysis 7 | 8 | class SpectralSpreadAnalysis(Analysis): 9 | """ 10 | Spectral spread descriptor class for generation of spectral spread audio 11 | analysis. 12 | 13 | This descriptor calculates the spectral spread for overlapping grains of an 14 | AnalysedAudioFile object. A full definition of spectral spread analysis can 15 | be found in the documentation. 16 | 17 | Arguments: 18 | 19 | - analysis_group: the HDF5 file group to use for the storage of the 20 | analysis. 21 | 22 | - config: The configuration module used to configure the analysis 23 | """ 24 | 25 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 26 | super(SpectralSpreadAnalysis, self).__init__(AnalysedAudioFile,frames, analysis_group, 'SpcSprd') 27 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 28 | # Store reference to the file to be analysed 29 | self.AnalysedAudioFile = AnalysedAudioFile 30 | self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2. 31 | try: 32 | spccntr = self.AnalysedAudioFile.analyses["spccntr"] 33 | except KeyError: 34 | raise KeyError("Spectral Centroid analysis is required for " 35 | "spectral spread analysis.") 36 | try: 37 | fft = self.AnalysedAudioFile.analyses["fft"] 38 | except KeyError: 39 | raise KeyError("FFT analysis is required for spectral spread " 40 | "analysis.") 41 | 42 | self.analysis_group = analysis_group 43 | self.logger.info("Creating Spectral Spread analysis for {0}".format(self.AnalysedAudioFile.name)) 44 | self.create_analysis( 45 | fft.analysis['frames'], 46 | spccntr.analysis['frames'], 47 | self.AnalysedAudioFile.samplerate 48 | ) 49 | self.spccntr_window_count = None 50 | 51 | def hdf5_dataset_formatter(self, *args, **kwargs): 52 | ''' 53 | Formats the output from the analysis method to save to the HDF5 file. 54 | ''' 55 | samplerate = self.AnalysedAudioFile.samplerate 56 | output = self.create_spcsprd_analysis(*args, **kwargs) 57 | times = self.calc_spcsprd_frame_times(output, self.AnalysedAudioFile.frames, samplerate) 58 | return ({'frames': output, 'times': times}, {}) 59 | 60 | @staticmethod 61 | def create_spcsprd_analysis(fft, spectral_centroid, samplerate, output_format = "ind"): 62 | ''' 63 | Calculate the spectral spread of the fft frames. 64 | 65 | fft: Real fft frames. 66 | spectral_centroid: spectral centroid frames (in index format). 67 | length: the length of the window used to calculate the FFT. 68 | samplerate: the samplerate of the audio analysed. 69 | ''' 70 | fft = fft[:] 71 | spectral_centroid = spectral_centroid[:] 72 | # Get the positive magnitudes of each bin. 73 | magnitudes = np.abs(fft) 74 | mag_max = np.max(magnitudes) 75 | if not mag_max: 76 | y = np.empty(magnitudes.shape[0]) 77 | y.fill(np.nan) 78 | return y 79 | # Get the index for each bin 80 | if output_format == "ind": 81 | freqs = np.arange(np.size(fft, axis=1)) 82 | elif output_format == "freq": 83 | freqs = np.fft.rfftfreq((np.size(fft, axis=1)*2)-1, 1.0/samplerate) 84 | else: 85 | raise ValueError("\'{0}\' is not a valid output " 86 | "format.".format(output_format)) 87 | 88 | spectral_centroid = np.vstack(spectral_centroid) 89 | 90 | a = (freqs-spectral_centroid)**2 91 | mag_sqrd = magnitudes**2 92 | # Calculate the weighted mean 93 | y = np.sqrt(np.sum(a*mag_sqrd, axis=1) / (np.sum(mag_sqrd, axis=1))) 94 | 95 | return y 96 | 97 | @staticmethod 98 | def calc_spcsprd_frame_times(spcsprd_frames, sample_frame_count, samplerate): 99 | 100 | """Calculate times for frames using sample size and samplerate.""" 101 | 102 | # Get number of frames for time and frequency 103 | timebins = spcsprd_frames.shape[0] 104 | # Create array ranging from 0 to number of time frames 105 | scale = np.arange(timebins+1) 106 | # divide the number of samples by the total number of frames, then 107 | # multiply by the frame numbers. 108 | spcsprd_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float) 109 | # Divide by the samplerate to give times in seconds 110 | spcsprd_times = spcsprd_times / samplerate 111 | return spcsprd_times 112 | 113 | def mean_formatter(self, data): 114 | """Calculate the mean value of the analysis data""" 115 | 116 | values = data[0] 117 | 118 | output = np.empty(len(values)) 119 | for ind, i in enumerate(values): 120 | mean_i = np.mean(i) 121 | if mean_i == 0: 122 | output[ind] = np.nan 123 | else: 124 | output[ind] = np.log10(np.mean(i))/self.nyquist_rate 125 | return output 126 | 127 | def median_formatter(self, data): 128 | """Calculate the median value of the analysis data""" 129 | values = data[0] 130 | 131 | output = np.empty(len(data)) 132 | for ind, i in enumerate(values): 133 | median_i = np.median(i) 134 | if median_i == 0: 135 | output[ind] = np.nan 136 | else: 137 | output[ind] = np.log10(np.median(i))/self.nyquist_rate 138 | return output 139 | -------------------------------------------------------------------------------- /src/sppysound/docs/DatabaseMatchingExample.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Matching a target sample to a database" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "from sppysound.database import AudioDatabase, Matcher\n", 19 | "import matching_config" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "source_dir = \"./ExampleDatabase\"\n", 31 | "target_dir = \"./ExampleTarget\"\n", 32 | "output_dir = \"./ExampleOutput\"\n", 33 | "analysis_list = [\"rms\", \"f0\"]" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "Load source and target databases for matching..." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "source_database = AudioDatabase(\n", 52 | " source_dir, \n", 53 | " analysis_list=analysis_list, \n", 54 | " config=matching_config\n", 55 | ")\n", 56 | "source_database.load_database(reanalyse=True)\n", 57 | "target_database = AudioDatabase(\n", 58 | " target_dir, \n", 59 | " analysis_list=analysis_list, \n", 60 | " config=matching_config\n", 61 | ")\n", 62 | "target_database.load_database(reanalyse=True)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "An output database must also be defined. This is to store matching results and synthesis results generated later.\n", 70 | "Note that an analysis list was not defined for this as it will not be analysed" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 4, 76 | "metadata": { 77 | "collapsed": false 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "output_database = AudioDatabase(output_dir, config=matching_config)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "The database must still be loaded to check for previous HDF5 files to use for results" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 5, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "output_database.load_database(reanalyse=False)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "A matcher object is then created using the loaded databases, ready to perform matching. The rematch argument can be set to discard any previously found matches from pre-existing HDF5 files, otherwise previously found matches will cause the program to terminate for their preservation." 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 6, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "matcher = Matcher(\n", 118 | " source_database,\n", 119 | " target_database,\n", 120 | " output_db=output_database,\n", 121 | " config=matching_config,\n", 122 | " rematch=True\n", 123 | ")" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "The matching is then run using the brute force matcher method. Other methods are not currently available.\n", 131 | "\n", 132 | "Warnings may be produced during this process. These will be silenced in a future revision but do not affect results." 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 7, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "matcher.match(\n", 144 | " matcher.kdtree_matcher,\n", 145 | ")" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "The output database will now contain a HDF5 file containing matching data for the two databases. This can be used to synthesize results." 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "The matching_config file for this demo is:\n", 160 | "\n", 161 | "~~~python\n", 162 | "rms = {\n", 163 | " \"window_size\": 100,\n", 164 | " \"overlap\": 2,\n", 165 | "}\n", 166 | "\n", 167 | "analysis_dict = {\n", 168 | " \"f0\": \"log2_median\",\n", 169 | " \"rms\": \"mean\"\n", 170 | "}\n", 171 | "\n", 172 | "matcher_weightings = {\n", 173 | " \"f0\" : 1.,\n", 174 | " \"rms\": 1.\n", 175 | "}\n", 176 | "\n", 177 | "analysis = {\n", 178 | " \"reanalyse\": False\n", 179 | "}\n", 180 | "\n", 181 | "matcher = {\n", 182 | " \"rematch\": False,\n", 183 | " \"grain_size\": 100,\n", 184 | " \"overlap\": 2,\n", 185 | " # Defines the number of matches to keep for synthesis.\n", 186 | " \"match_quantity\": 20\n", 187 | "}\n", 188 | "\n", 189 | "output_file = {\n", 190 | " \"samplerate\": 44100,\n", 191 | " \"format\": 131075,\n", 192 | " \"channels\": 1\n", 193 | "}\n", 194 | "~~~" 195 | ] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "Python 2", 201 | "language": "python", 202 | "name": "python2" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 2 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython2", 214 | "version": "2.7.11" 215 | } 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 0 219 | } 220 | -------------------------------------------------------------------------------- /src/sppysound/Examples/Match Synthesis Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sppysound.database import AudioDatabase, Synthesizer\n", 12 | "import synthesis_config" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "source_dir = \"./ExampleDatabase\"\n", 24 | "target_dir = \"./ExampleTarget\"\n", 25 | "output_dir = \"./ExampleOutput\"" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "Load source database.\n", 33 | "Also load the F0, RMS and Peak analyses for use with amplitude and pitch enforcement." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "source_database = AudioDatabase(\n", 45 | " source_dir,\n", 46 | " config=synthesis_config,\n", 47 | " analysis_list={\"f0\", \"rms\", \"peak\"}\n", 48 | ")\n", 49 | "source_database.load_database(reanalyse=False)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "Load database used to generate matches to source database. \n", 57 | "This is used when enforcing analyses such as RMS and F0. (Original grains are needed to calculate the ratio to alter the synthesized grain by)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": { 64 | "collapsed": false 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "target_database = AudioDatabase(\n", 69 | " target_dir,\n", 70 | " config=synthesis_config,\n", 71 | " analysis_list={\"f0\", \"rms\", \"peak\"}\n", 72 | ")\n", 73 | "target_database.load_database(reanalyse=False)\n", 74 | "\n", 75 | "output_database = AudioDatabase(\n", 76 | " output_dir,\n", 77 | " config=synthesis_config\n", 78 | ")\n", 79 | "output_database.load_database(reanalyse=False)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Initialise the synthesizer object used for generating the final output." 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "synthesizer = Synthesizer(source_database, output_database, target_db=target_database, config=synthesis_config)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "Run synthesis. As with the matching, warnings may be generated. These have all been accounted for and will be silenced in a future release. The output audio can now be found in the audio folder of ./ExampleOutput" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": { 111 | "collapsed": false, 112 | "scrolled": true 113 | }, 114 | "outputs": [ 115 | { 116 | "name": "stderr", 117 | "output_type": "stream", 118 | "text": [ 119 | "/Users/sam/PerryPerrySource/pysource/pysound/src/sppysound/audiofile.py:665: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.\n", 120 | " if self.times == None:\n", 121 | "/Users/sam/PerryPerrySource/pysource/pysound/src/sppysound/audiofile.py:297: UserWarning: write_frames::warning::audio data has been clipped while writing to file ./.shift_input.wav.\n", 122 | " return self.pysndfile_object.write_frames(input)\n", 123 | "/Users/sam/.pyenv/versions/2.7.10/lib/python2.7/site-packages/numpy/core/_methods.py:59: RuntimeWarning: Mean of empty slice.\n", 124 | " warnings.warn(\"Mean of empty slice.\", RuntimeWarning)\n", 125 | "/Users/sam/.pyenv/versions/2.7.10/lib/python2.7/site-packages/numpy/core/_methods.py:71: RuntimeWarning: invalid value encountered in double_scalars\n", 126 | " ret = ret.dtype.type(ret / rcount)\n" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "synthesizer.synthesize()" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "The synthesis_config.py file for this demo is:\n", 139 | "~~~python\n", 140 | "rms = {\n", 141 | " \"window_size\": 100,\n", 142 | " \"overlap\": 2,\n", 143 | "}\n", 144 | "\n", 145 | "analysis_dict = {\n", 146 | " \"f0\": \"log2_median\",\n", 147 | " \"rms\": \"mean\"\n", 148 | "}\n", 149 | "\n", 150 | "analysis = {\n", 151 | " \"reanalyse\": False\n", 152 | "}\n", 153 | "\n", 154 | "output_file = {\n", 155 | " \"samplerate\": 44100,\n", 156 | " \"format\": 131075,\n", 157 | " \"channels\": 1\n", 158 | "}\n", 159 | "\n", 160 | "synthesizer = {\n", 161 | " \"enforce_rms\": True,\n", 162 | " \"enf_rms_ratio_limit\": 5.,\n", 163 | " \"enforce_f0\": True,\n", 164 | " \"enf_f0_ratio_limit\": 10.,\n", 165 | " \"grain_size\": 100,\n", 166 | " \"overlap\": 2,\n", 167 | " \"normalize\" : True,\n", 168 | " # Defines the number of potential grains to choose from matches when\n", 169 | " # synthesizing output.\n", 170 | " \"match_quantity\": 20\n", 171 | "}\n", 172 | "~~~" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": { 179 | "collapsed": true 180 | }, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "Python 2", 188 | "language": "python", 189 | "name": "python2" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 2 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython2", 201 | "version": "2.7.10" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 0 206 | } 207 | -------------------------------------------------------------------------------- /src/sppysound/docs/MatchSynthesisExample.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Synthesizing output from matches" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 4, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "from sppysound.database import AudioDatabase, Synthesizer, Matcher\n", 19 | "import synthesis_config\n", 20 | "import config" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 5, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "source_dir = \"./ExampleDatabase\"\n", 32 | "target_dir = \"./ExampleTarget\"\n", 33 | "output_dir = \"./ExampleOutput\"" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "Load source database.\n", 41 | "Also load the F0, RMS and Peak analyses for use with amplitude and pitch enforcement." 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 6, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "source_database = AudioDatabase(\n", 53 | " source_dir,\n", 54 | " config=synthesis_config,\n", 55 | " analysis_list={\"f0\", \"rms\"}\n", 56 | ")\n", 57 | "source_database.load_database(reanalyse=True)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "Load database used to generate matches to source database. \n", 65 | "This is used when enforcing analyses such as RMS and F0. (Original grains are needed to calculate the ratio to alter the synthesized grain by)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 7, 71 | "metadata": { 72 | "collapsed": false 73 | }, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "Traceback (most recent call last):\n", 80 | " File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/database.py\", line 157, in analyse_database\n", 81 | " config=self.config\n", 82 | " File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/audiofile.py\", line 943, in __enter__\n", 83 | " \"empty\".format(self.name))\n", 84 | "IOError: File isn't valid: ElectricGuitarSample-out_output.wav\n", 85 | "Check that file is mono and isn't empty\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "target_database = AudioDatabase(\n", 91 | " target_dir,\n", 92 | " config=synthesis_config,\n", 93 | " analysis_list={\"f0\", \"rms\"}\n", 94 | ")\n", 95 | "target_database.load_database(reanalyse=True)\n", 96 | "\n", 97 | "output_database = AudioDatabase(\n", 98 | " output_dir,\n", 99 | " config=config\n", 100 | ")\n", 101 | "output_database.load_database(reanalyse=False)\n", 102 | "\n", 103 | "matcher = Matcher(\n", 104 | " source_database,\n", 105 | " target_database,\n", 106 | " output_db=output_database,\n", 107 | " config=config,\n", 108 | " rematch=True\n", 109 | ")\n", 110 | "matcher.match(\n", 111 | " matcher.kdtree_matcher,\n", 112 | " grain_size=config.matcher[\"grain_size\"],\n", 113 | " overlap=config.matcher[\"overlap\"]\n", 114 | ")" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "Initialise the synthesizer object used for generating the final output." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "synthesizer = Synthesizer(\n", 133 | " source_database, \n", 134 | " output_database, \n", 135 | " target_db=target_database, \n", 136 | " config=config\n", 137 | ")" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "Run synthesis. As with the matching, warnings may be generated. These have all been accounted for and will be silenced in a future release. The output audio can now be found in the audio folder of ./ExampleOutput" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 9, 150 | "metadata": { 151 | "collapsed": false, 152 | "scrolled": true 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "synthesizer.synthesize(\n", 157 | " grain_size=config.synthesizer[\"grain_size\"],\n", 158 | " overlap=config.synthesizer[\"overlap\"]\n", 159 | ")" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "The synthesis_config.py file for this demo is:\n", 167 | "\n", 168 | "~~~python\n", 169 | "rms = {\n", 170 | " \"window_size\": 100,\n", 171 | " \"overlap\": 2,\n", 172 | "}\n", 173 | "\n", 174 | "analysis_dict = {\n", 175 | " \"f0\": \"log2_median\",\n", 176 | " \"rms\": \"mean\"\n", 177 | "}\n", 178 | "\n", 179 | "analysis = {\n", 180 | " \"reanalyse\": False\n", 181 | "}\n", 182 | "\n", 183 | "output_file = {\n", 184 | " \"samplerate\": 44100,\n", 185 | " \"format\": 131075,\n", 186 | " \"channels\": 1\n", 187 | "}\n", 188 | "\n", 189 | "synthesizer = {\n", 190 | " \"enforce_rms\": True,\n", 191 | " \"enf_rms_ratio_limit\": 5.,\n", 192 | " \"enforce_f0\": True,\n", 193 | " \"enf_f0_ratio_limit\": 10.,\n", 194 | " \"grain_size\": 100,\n", 195 | " \"overlap\": 2,\n", 196 | " \"normalize\" : True,\n", 197 | " # Defines the number of potential grains to choose from matches when\n", 198 | " # synthesizing output.\n", 199 | " \"match_quantity\": 20\n", 200 | "}\n", 201 | "~~~" 202 | ] 203 | } 204 | ], 205 | "metadata": { 206 | "kernelspec": { 207 | "display_name": "Python 2", 208 | "language": "python", 209 | "name": "python2" 210 | }, 211 | "language_info": { 212 | "codemirror_mode": { 213 | "name": "ipython", 214 | "version": 2 215 | }, 216 | "file_extension": ".py", 217 | "mimetype": "text/x-python", 218 | "name": "python", 219 | "nbconvert_exporter": "python", 220 | "pygments_lexer": "ipython2", 221 | "version": "2.7.11" 222 | } 223 | }, 224 | "nbformat": 4, 225 | "nbformat_minor": 0 226 | } 227 | -------------------------------------------------------------------------------- /src/sppysound/synthesis/synthesis_tools.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | from sppysound import AudioFile 4 | import matplotlib.pyplot as plt 5 | import pdb 6 | import scipy 7 | 8 | 9 | def convolve(input, impulse_response): 10 | out = np.zeros(len(input) + len(impulse_response) - 1) 11 | for input_ind, i in enumerate(input): 12 | for imp_ind, j in enumerate(impulse_response): 13 | out[input_ind+imp_ind] = out[input_ind+imp_ind] + i*j 14 | return out 15 | 16 | def moving_average_filter_recursive(input, M, symetry = 'after'): 17 | ''' 18 | Applies a moving average filter to the input. 19 | 20 | Arguments: 21 | input - the input signal to filter. 22 | symetry - ('before' or 'middle') defines how points will be 23 | averaged around the index 24 | M - the number of coefficients. 25 | ''' 26 | # Calculate the filter coefficients 27 | filter_kernal = np.ones(M) / M 28 | # Get the pre-zero-padded input size. 29 | input_size = input.size 30 | # Pad end of input with zeros. 31 | if symetry == 'after': 32 | # Zero-pad input at end on input for averaging of end samples 33 | input = np.hstack((input, np.zeros(M))) 34 | elif symetry == 'middle': 35 | # M value must be odd to have an equal number of samples on each side. 36 | if not M % 2: 37 | raise ValueError("M must be odd for symetrical averaging") 38 | # Calculate the zero padding size. 39 | offset = np.floor(M/2.0) 40 | # Zero pad input on both sides to allow for averaging from first sample 41 | # to last sample 42 | input = np.hstack((np.zeros(offset), input, np.zeros(offset))) 43 | 44 | 45 | # Calculate the number of output samples. 46 | # y = np.zeros(input.size-M) 47 | 48 | y = np.zeros(input.size-M) 49 | # If averaging after first sample. 50 | if symetry == 'after': 51 | # For each sample in the input 52 | acc = 0 53 | 54 | i = 0 55 | while i < M: 56 | acc += input[i] 57 | i += 1 58 | y[0] = acc / M 59 | 60 | i = 1 61 | while i < input.size-M: 62 | acc += input[i+M-1] - input[i-1] 63 | y[i] = acc/M 64 | i += 1 65 | print(y) 66 | 67 | elif symetry == 'middle': 68 | # TODO: Make recursive 69 | i = 0 70 | # For all the input samples 71 | while i < input_size-offset: 72 | # The output sample is the average sample value for M samples. 73 | y[i] = np.sum(input[i:i+M] * filter_kernal) 74 | i += 1 75 | return y 76 | 77 | def moving_average_filter(input, M, symetry = 'after'): 78 | ''' 79 | Applies a moving average filter to the input. 80 | 81 | Arguments: 82 | input - the input signal to filter. 83 | symetry - ('before' or 'middle') defines how points will be 84 | averaged around the index 85 | M - the number of coefficients. 86 | ''' 87 | # Calculate the filter coefficients 88 | filter_kernal = np.ones(M) / M 89 | # Get the pre-zero-padded input size. 90 | input_size = input.size 91 | # Pad end of input with zeros. 92 | if symetry == 'after': 93 | # Zero-pad input at end on input for averaging of end samples 94 | input = np.hstack((input, np.zeros(M))) 95 | elif symetry == 'middle': 96 | # M value must be odd to have an equal number of samples on each side. 97 | if not M % 2: 98 | raise ValueError("M must be odd for symetrical averaging") 99 | # Calculate the zero padding size. 100 | offset = np.floor(M/2.0) 101 | # Zero pad input on both sides to allow for averaging from first sample 102 | # to last sample 103 | input = np.hstack((np.zeros(offset), input, np.zeros(offset))) 104 | 105 | 106 | # Calculate the number of output samples. 107 | y = np.zeros(input.size-M) 108 | 109 | # If averaging after first sample. 110 | if symetry == 'after': 111 | i = 0 112 | # For each sample in the input 113 | while i < input_size: 114 | y[i] = np.sum(input[i:i+M] / M) 115 | i += 1 116 | # If averaging symetrically 117 | elif symetry == 'middle': 118 | i = 0 119 | # For all the input samples 120 | while i < input_size-offset: 121 | # The output sample is the average sample value for M samples. 122 | y[i] = np.sum(input[i:i+M] / M) 123 | i += 1 124 | return y 125 | 126 | def blackman_filter(input, window_size, freq): 127 | ''' 128 | Create a blackman windowed-sinc filter. 129 | 130 | freq - The cutoff frequency of the filter specified as a proportion of the 131 | samplerate of the signal. 132 | ''' 133 | # TODO: Check the definition of freq is correct. 134 | 135 | i = np.arange(window_size) 136 | # Create a sinc function of M length. 137 | # The output will be a sinc function shifted from -M/2 - M/2 to 0 - M. 138 | # This will result in a sinc function that can be used to create a filter 139 | # at the cutoff-frequency provided in freq. 140 | sinc_kernal = np.sin(2*np.pi*freq*(i-window_size/2))/(i-window_size/2) 141 | 142 | # Create a blackman window 143 | window = 0.42 - 0.5 * np.cos(2 * np.pi * (i / window_size)) + 0.08 * np.cos(4 * np.pi * (i / window_size)) 144 | window_sinc = sinc_kernal * window 145 | 146 | # Number of samplepoints 147 | N = window_size 148 | # sample spacing 149 | T = 1.0 / 800.0 150 | yf = scipy.fftpack.fft(window_sinc) 151 | xf = np.linspace(0.0, 1.0/(2.0*T), N/2) 152 | 153 | 154 | plt.subplot(311) 155 | plt.title('Blackman Window') 156 | plt.plot(window) 157 | plt.ylabel('Amplitude') 158 | plt.xlabel('sample') 159 | plt.subplot(312) 160 | plt.title('Window sinc function') 161 | plt.plot(sinc_kernal) 162 | plt.subplot(313) 163 | plt.title('FFT') 164 | plt.plot(xf, 2.0/N * np.abs(yf[0:N/2])) 165 | plt.show() 166 | 167 | if __name__ == "__main__": 168 | ''' 169 | a = np.array([1, 0.5, 3, 1]) 170 | b = np.array([1, 0, 0, 0]) 171 | c = convolve(a, b) 172 | print(c) 173 | print(np.convolve(a, b)) 174 | ''' 175 | with AudioFile('./test_audio.aif', 'r') as test_audio: 176 | grain = test_audio.read_grain(0, -1) 177 | grain = np.arange(5000) 178 | filtered_grain = moving_average_filter(grain, 101) 179 | filtered_r_grain = moving_average_filter_recursive(grain, 101) 180 | 181 | blackman_filter(grain, 101, 0.14) 182 | 183 | ''' 184 | # Plot test wave 185 | plt.subplot(211) 186 | plt.title('Original Wave') 187 | plt.plot(grain) 188 | plt.ylabel('Amplitude') 189 | plt.xlabel('sample') 190 | plt.subplot(212) 191 | plt.title('Filtered Wave') 192 | plt.plot(filtered_grain) 193 | plt.ylabel('Amplitude') 194 | plt.xlabel('sample') 195 | plt.show() 196 | ''' 197 | -------------------------------------------------------------------------------- /src/sppysound/analysis/Analysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import numpy as np 4 | import logging 5 | import pdb 6 | 7 | from fileops import pathops 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | class Analysis(object): 12 | 13 | """ 14 | Basic descriptor class to build analyses on. 15 | 16 | The Analysis base class works as an interface between child descriptor 17 | objects and the HDF5 storage file. This is designed to seperate descriptor 18 | generation from data IO, allowing for quick development of new descriptor 19 | classes. The base Analysis class has methods for retreiving analyses from 20 | file and saving data created by analysis objects to file. It also has basic 21 | formatting methods used to return data in the required format for processed 22 | such as descriptor comparisons. 23 | 24 | In order to create a new descriptor, the hdf5_dataset_formatter method will 25 | need to be overwritten by the child class to generate and store the 26 | descriptor's output in the appropriate manner. Examples of this can be seen 27 | through the currently implemented descriptors. 28 | """ 29 | 30 | def __init__(self, AnalysedAudioFile, frames, analysis_group, name, config=None): 31 | # Create object logger 32 | self.logger = logging.getLogger(__name__ + '.{0}Analysis'.format(name)) 33 | # Store AnalysedAudioFile object to be analysed. 34 | self.AnalysedAudioFile = AnalysedAudioFile 35 | self.analysis_group = analysis_group 36 | self.name = name 37 | 38 | def create_analysis(self, *args, **kwargs): 39 | """ 40 | Create the analysis and save to the HDF5 file. 41 | 42 | analysis_function: The function used to create the analysis. returned 43 | data will be stored in the HDF5 file. 44 | """ 45 | 46 | try: 47 | self.analysis = self.analysis_group.create_group(self.name) 48 | except ValueError: 49 | self.logger.info("{0} analysis group already exists".format(self.name)) 50 | self.analysis = self.analysis_group[self.name] 51 | 52 | # If forcing new analysis creation then delete old analysis and create 53 | # a new one 54 | if self.AnalysedAudioFile.force_analysis: 55 | self.logger.info("Force re-analysis is enabled. " 56 | "deleting: {0}".format(self.analysis.name)) 57 | # Delete all pre-existing data in database. 58 | for i in self.analysis.iterkeys(): 59 | del self.analysis[i] 60 | # Run the analysis function and format it's returned data ready to 61 | # be saved in the HDF5 file 62 | data_dict, attrs_dict = self.hdf5_dataset_formatter(*args, **kwargs) 63 | for key, value in data_dict.iteritems(): 64 | self.analysis.create_dataset(key, data=value, chunks=True) 65 | for key, value in attrs_dict.iteritems(): 66 | self.analysis.attrs[key] = value 67 | else: 68 | 69 | if self.analysis.keys(): 70 | self.logger.info("Analysis already exists. Reading from: " 71 | "{0}".format(self.analysis.name)) 72 | else: 73 | # If it doesn't then generate a new file 74 | # Run the analysis function and format it's returned data ready to 75 | # be saved in the HDF5 file 76 | data_dict, attrs_dict = self.hdf5_dataset_formatter(*args, **kwargs) 77 | for key, value in data_dict.iteritems(): 78 | self.analysis.create_dataset(key, data=value, chunks=True) 79 | for key, value in attrs_dict.iteritems(): 80 | self.analysis.attrs[key] = value 81 | 82 | def get_analysis_grains(self, start, end): 83 | """ 84 | Retrieve analysis frames for period specified in start and end times. 85 | arrays of start and end time pairs will produce an array of equivelant 86 | size containing frames for these times. 87 | """ 88 | times = self.analysis_group[self.name]["times"][:] 89 | start = start / 1000 90 | end = end / 1000 91 | vtimes = times.reshape(-1, 1) 92 | 93 | selection = np.transpose((vtimes >= start) & (vtimes <= end)) 94 | # If there are no frames for this grain, take the two closest frames 95 | # from the adjacent grains. 96 | if not selection.any(): 97 | frame_center = start + (end-start)/2. 98 | closest_frames = np.abs(vtimes-frame_center).argsort()[:2] 99 | selection[closest_frames] = True 100 | 101 | #start_ind = np.min(selection) 102 | #end_ind = np.argmax(selection) 103 | frames = self.analysis_group[self.name]["frames"][:] 104 | 105 | grain_data = (frames, selection) 106 | 107 | return grain_data 108 | 109 | def hdf5_dataset_formatter(analysis_method, *args, **kwargs): 110 | ''' 111 | Note: This is a generic formatter designed as a template to be 112 | overwritten by a descriptor sub-class. 113 | 114 | Formats the output from the analysis method to save to the HDF5 file. 115 | 116 | Places data and attributes in 2 dictionaries to be stored in the HDF5 117 | file. 118 | ''' 119 | output, attributes = analysis_method(*args, **kwargs) 120 | return ({'data': output}, {'attrs': attributes}) 121 | 122 | ################################################################################ 123 | # Formatting functions 124 | ################################################################################ 125 | 126 | def log2_median(self, x): 127 | return np.median(1000 * np.log2(1+x/1000)) 128 | 129 | def log2_mean(self, x): 130 | return np.mean(1000 * np.log2(1+x/1000)) 131 | 132 | def formatter_func(self, selection, frames, valid_inds, formatter=None): 133 | # get all valid frames from current grain 134 | frames = frames[selection & valid_inds] 135 | 136 | return formatter(frames) 137 | #if less than half the frames are valid then the grain is not valid. 138 | if frames.size < valid_inds[selection].nonzero()[0].size/2: 139 | return np.nan 140 | 141 | def analysis_formatter(self, frames, selection, format): 142 | """Calculate the average analysis value of the grain using the match format specified.""" 143 | valid_inds = np.isfinite(frames) 144 | 145 | format_style_dict = { 146 | 'mean': np.mean, 147 | 'median': np.median, 148 | 'log2_mean': self.log2_mean, 149 | 'log2_median': self.log2_median, 150 | } 151 | output = np.empty(len(selection)) 152 | 153 | if not selection.size: 154 | # TODO: Add warning here 155 | return np.nan 156 | # For debugging apply_along_axis: 157 | #for ind, i in enumerate(selection): 158 | # output[ind] = self.formatter_func(i, frames, valid_inds, formatter=format_style_dict[format]) 159 | 160 | output = np.apply_along_axis(self.formatter_func, 1, selection, frames, valid_inds, formatter=format_style_dict[format]) 161 | return output 162 | -------------------------------------------------------------------------------- /src/sppysound/analysis/AttackAnalysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import numpy as np 4 | import math 5 | import pdb 6 | import logging 7 | 8 | from fileops import pathops 9 | 10 | logger = logging.getLogger(__name__).addHandler(logging.NullHandler()) 11 | 12 | 13 | class AttackAnalysis: 14 | 15 | """Encapsulation of attack estimation analysis.""" 16 | 17 | def __init__(self, AnalysedAudioFile, atkpath, config=None): 18 | self.logger = logging.getLogger(__name__ + '.AttackAnalysis') 19 | self.AnalysedAudioFile = AnalysedAudioFile 20 | self.attackpath = atkpath 21 | self.attack_start = None 22 | self.attack_end = None 23 | self.attack_size = None 24 | self.logattacktime = None 25 | # Check if analysis file already exists. 26 | # TODO: check if RMS has changed, if it has then new values will need 27 | # to be generated even if a file already exists. 28 | if not self.attackpath: 29 | if not self.AnalysedAudioFile.db_dir: 30 | raise IOError("Analysed Audio object must have an atk file" 31 | " path or be part of a database") 32 | self.attackpath = os.path.join( 33 | self.AnalysedAudioFile.db_dir, 34 | "atk", 35 | self.AnalysedAudioFile.name + 36 | ".lab") 37 | if self.AnalysedAudioFile.force_analysis: 38 | pathops.delete_if_exists(self.attackpath) 39 | self.attackpath = self.create_attack_analysis() 40 | else: 41 | try: 42 | # If it does then get values from file 43 | self.get_attack_from_file() 44 | except IOError: 45 | # Otherwise, generate new values 46 | self.create_attack_analysis() 47 | 48 | def create_attack_analysis(self, multiplier=3): 49 | """ 50 | Estimate the start and end of the attack of the audio. 51 | 52 | Adaptive threshold method (weakest effort method) described here: 53 | http://recherche.ircam.fr/anasyn/peeters/ARTICLES/Peeters_2003_cuidadoaudiofeatures.pdf 54 | Stores values in a file at the attack path provided with the following 55 | format: 56 | attack_start attack_end 57 | """ 58 | pdb.set_trace() 59 | # Make sure RMS has been calculated 60 | if not self.AnalysedAudioFile.RMS: 61 | raise IOError("RMS analysis is required to estimate attack") 62 | with open(self.attackpath, 'w') as attackfile: 63 | self.logger.info("Creating attack estimation file:\t\t", 64 | os.path.relpath(self.attackpath)) 65 | rms_contour = self.AnalysedAudioFile.RMS.get_rms_from_file() 66 | # Scale RMS contour to range so all calculations are performed in 67 | # the range 0.0 to 1.0 68 | # TODO: Should calculations be done in range of rms rather than 69 | # converting for performance increase? 70 | rms_contour = self.scale_to_range(rms_contour) 71 | # Create a grid of thresholds ranging from 0.0 to 1.0 72 | thresholds = np.arange(1, 11) * 0.1 73 | thresholds = thresholds.reshape(-1, 1) 74 | # Find first index of rms that is over the threshold for each 75 | # thresholds 76 | threshold_inds = np.argmax(rms_contour >= thresholds, axis=1) 77 | 78 | # TODO: Need to make sure rms does not return to a lower threshold 79 | # after being > a threshold. 80 | 81 | # Calculate the time difference between each of the indexes 82 | ind_diffs = np.ediff1d(threshold_inds) 83 | # Find the average time between thresholds 84 | mean_ind_diff = np.mean(ind_diffs) 85 | # Calculate the start threshold by finding the first threshold that 86 | # goes below the average time * the multiplier 87 | try: 88 | # For each threshold value find the times where the signal goes 89 | # from below the threshold to above the threshold 90 | 91 | # find the smallest positive time between each threshold 92 | # passing to the next threshold. each sucsessive time cannot be 93 | # less than that of the previous times? 94 | 95 | a = np.argmax(ind_diffs < (mean_ind_diff * multiplier)) 96 | attack_start_ind = threshold_inds[a] 97 | # Calculate the end threshold by thr same method except looking 98 | # above the average time * the multiplier 99 | best_end_thresh = ind_diffs > (mean_ind_diff * multiplier) 100 | if not best_end_thresh: 101 | attack_end_ind = threshold_inds[-1] 102 | else: 103 | attack_end_ind = threshold_inds[np.argmax(best_end_thresh)] 104 | except ValueError as err: 105 | raise ValueError("Attack estimation failed: {0}".format(err)) 106 | self.logger.info("START: {0}\nEND: {1}".format(attack_start_ind, attack_end_ind)) 107 | # TODO: Refine position by searching for local min and max of these 108 | # values 109 | self.attack_start = self.AnalysedAudioFile.samps_to_secs( 110 | attack_start_ind) 111 | self.attack_end = self.AnalysedAudioFile.samps_to_secs( 112 | attack_end_ind) 113 | # Values are stored in the file with the following format: 114 | # attack_start attack_end 115 | attackfile.write("{0} {1}\n".format(self.attack_start, 116 | self.attack_end)) 117 | 118 | def calc_log_attack_time(self): 119 | """ 120 | Calculate the logarithm of the time duration between the time the 121 | signal starts to the time that the signal reaches it's stable part 122 | Described here: 123 | http://recherche.ircam.fr/anasyn/peeters/ARTICLES/Peeters_2003_cuidadoaudiofeatures.pdf 124 | """ 125 | if not self.attack_start or not self.attack_end: 126 | raise ValueError("Attack times must be calculated before calling" 127 | "the log attack time method") 128 | self.logattacktime = math.log10(self.attackend-self.attackstart) 129 | 130 | def get_attack_from_file(self): 131 | """Read the attack values from a previously generated file.""" 132 | # TODO: 133 | self.logger.info("Reading attack estimation file:\t\t", 134 | os.path.relpath(self.attackpath)) 135 | with open(self.attackpath, 'r') as attackfile: 136 | for line in attackfile: 137 | # Split the values and convert to their correct types 138 | starttime, endtime = line.split() 139 | self.attack_start = float(starttime) 140 | self.attack_end = float(endtime) 141 | self.attack_size = self.attack_end - self.attack_start 142 | 143 | @staticmethod 144 | def scale_to_range(array, high=1.0, low=0.0): 145 | mins = np.min(array) 146 | maxs = np.max(array) 147 | rng = maxs - mins 148 | return high - (((high - low) * (maxs - array)) / rng) 149 | -------------------------------------------------------------------------------- /src/sppysound/docs/overview.rst: -------------------------------------------------------------------------------- 1 | .. _overview: 2 | 3 | Overview 4 | ======== 5 | Concatenator is a tool for synthesizing interpretations of a sound, through the 6 | analysis and synthesis of audio grains from a corpus database. 7 | The program works by analysing overlapping segments of audio (known as grains) 8 | from both the target sound and the source database, then searching for the 9 | closest matching grain in the source database to the target sound. Finally, the 10 | output is generated by overlap-adding the best matches. 11 | 12 | To create the final output, there are three main operations to perform: 13 | 14 | .. graphviz:: 15 | 16 | digraph a { 17 | "Database Analysis" -> "Grain Matching" -> "Output Synthesis"; 18 | } 19 | 20 | .. raw:: latex 21 | 22 | \newpage 23 | 24 | Analysis 25 | -------- 26 | 27 | First, the descriptor analyses are generated for each audio file in both the 28 | source and target database. Full details on the types of descriptor available 29 | and their function can be found in the :ref:`descriptor_defs` section of 30 | this documentation. Analyses are then stored to a HDF5 file, ready for 31 | matching. 32 | 33 | .. graphviz:: 34 | 35 | digraph b { 36 | subgraph cluster0 { 37 | style=filled; 38 | color=lightgrey; 39 | node [shape=record,width=.1,height=.1]; 40 | node0 [label = " | | | | | | ",width=2.5] 41 | label = "Audio\nFiles"; 42 | labeljust="l"; 43 | } 44 | 45 | subgraph cluster2 { 46 | style=filled; 47 | color=lightgrey; 48 | node [shape=record,width=.1,height=.1]; 49 | node2 [label = "RMS | F0 | Centroid | Kurtosis | etc..."] 50 | label = "Analyses"; 51 | labeljust="l"; 52 | } 53 | database[shape=rectangle, label="Audio Directory"]; 54 | HDF[shape=rectangle, label="HDF5 File"]; 55 | database -> node0; 56 | node0 -> node2; 57 | node2 -> HDF 58 | } 59 | 60 | .. raw:: latex 61 | 62 | \newpage 63 | 64 | Matching 65 | -------- 66 | 67 | Both the source and target HDF5 files are loaded to compare the values of their 68 | analyses. Each audio file's analyses are split into equally sized overlapping 69 | grains and averaged in the appropriate way to be compared to grains from the 70 | other database. 71 | The matching algorithm then calculates the grains that have the smallest 72 | overall difference, based on user defined weightings for each of the analysis 73 | types. This weighting of analyses allows for certain analyses to gain 74 | precedence over others based on user preference. 75 | The best match indexes are then saved to the output database ready for 76 | synthesis. 77 | 78 | There are currently two implementations for the matching algorithm: 79 | 80 | - Brute Force 81 | 82 | - K-d Tree Search 83 | 84 | Both will return similar results, however the K-d tree search algorithm is 85 | far more efficient when analysing large datasets so is the preferred method. 86 | 87 | .. graphviz:: 88 | 89 | digraph b { 90 | subgraph cluster0 { 91 | style=filled; 92 | color=lightgrey; 93 | node [shape=record,width=.1,height=.1]; 94 | node0 [label = " | | Source | Audio | Analysis | | ",width=2.5] 95 | 96 | labeljust="l"; 97 | } 98 | 99 | subgraph cluster1 { 100 | style=filled; 101 | color=lightgrey; 102 | node [shape=record,width=.1,height=.1]; 103 | node1 [label = " | | | | | | | | | Source | Analysis | Grains | | | | | | | | | ",width=2.5] 104 | label="\n\n\n\n"; 105 | labeljust="l"; 106 | } 107 | 108 | subgraph cluster2 { 109 | style=filled; 110 | color=lightgrey; 111 | node [shape=record,width=.1,height=.1]; 112 | node2 [label = "Target Audio Analysis"] 113 | labeljust="l"; 114 | } 115 | subgraph cluster3 { 116 | style=filled; 117 | color=lightgrey; 118 | node [shape=record,width=.1,height=.1]; 119 | node3 [label = " | | Target | Analysis | Grains | | ",width=2.5] 120 | label="\n\n\n\n"; 121 | labeljust="l"; 122 | } 123 | database1[shape=rectangle, label="Source HDF5 File"]; 124 | database2[shape=rectangle, label="Target HDF5 File"]; 125 | database3[shape=rectangle, label="Output HDF5 File"]; 126 | matcher[shape=rectangle, label="Matching Algorithm"]; 127 | 128 | node0:f0 -> node1:f0 129 | node0:f0 -> node1:f1 130 | node0:f0 -> node1:f2 131 | node0:f1 -> node1:f3 132 | node0:f1 -> node1:f4 133 | node0:f1 -> node1:f5 134 | node0:f2 -> node1:f6 135 | node0:f2 -> node1:f7 136 | node0:f2 -> node1:f8 137 | node0:f3 -> node1:f9 138 | node0:f3 -> node1:f10 139 | node0:f3 -> node1:f11 140 | node0:f4 -> node1:f12 141 | node0:f4 -> node1:f13 142 | node0:f4 -> node1:f14 143 | node0:f5 -> node1:f15 144 | node0:f5 -> node1:f16 145 | node0:f5 -> node1:f17 146 | node0:f6 -> node1:f18 147 | node0:f6 -> node1:f19 148 | node0:f6 -> node1:f20 149 | node2:f0 -> node3:f0 150 | node2:f0 -> node3:f1 151 | node2:f0 -> node3:f2 152 | node2:f0 -> node3:f3 153 | node2:f0 -> node3:f4 154 | node2:f0 -> node3:f5 155 | node2:f0 -> node3:f6 156 | database1 -> node0; 157 | database2 -> node2; 158 | node1 -> matcher 159 | node3 -> matcher 160 | matcher -> database3 161 | 162 | } 163 | 164 | .. raw:: latex 165 | 166 | \newpage 167 | 168 | Synthesis 169 | --------- 170 | 171 | The synthesis process involves loading the best match grains from the source 172 | database, performing any post-processing (such as pitch shifting and amplitude 173 | scaling) to improve the similarity of the match, then windowed overlap adding 174 | the grains to create the final output. The post-processing phase involves using 175 | the ratio difference between the source and target grain to artificially alter 176 | the source grain so that it better resembles the target. This is particularly 177 | useful when using small source databases as it improves the similarity of any 178 | match (important when best matches aren't very close to the target.) The final 179 | output is saved to the output database's audio directory. 180 | 181 | .. graphviz:: 182 | 183 | digraph b { 184 | subgraph cluster3 { 185 | style=filled; 186 | color=lightgrey; 187 | node [shape=record,width=.1,height=.1]; 188 | node3 [label = " | | Matched | Audio | Grains | ",width=2.5] 189 | } 190 | database1[shape=rectangle, label="Source Audio"]; 191 | database3[shape=rectangle, label="Output HDF5 File"]; 192 | synthesizer[shape=rectangle, label="Windowed Overlap/Add"]; 193 | output[shape=rectangle, label="Output Audio File"]; 194 | 195 | database3 -> database1[label="Get match grains"]; 196 | database1 -> node3:f0; 197 | database1 -> node3:f1; 198 | database1 -> node3:f2; 199 | database1 -> node3:f3; 200 | database1 -> node3:f4; 201 | database1 -> node3:f5; 202 | node3:f0 -> synthesizer; 203 | node3:f1 -> synthesizer; 204 | node3:f2 -> synthesizer; 205 | node3:f3 -> synthesizer; 206 | node3:f4 -> synthesizer; 207 | node3:f5 -> synthesizer; 208 | synthesizer -> output; 209 | 210 | } 211 | -------------------------------------------------------------------------------- /src/sppysound/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " applehelp to make an Apple Help Book" 34 | @echo " devhelp to make HTML files and a Devhelp project" 35 | @echo " epub to make an epub" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | dirhtml: 60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 63 | 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | pickle: 70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 71 | @echo 72 | @echo "Build finished; now you can process the pickle files." 73 | 74 | json: 75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 76 | @echo 77 | @echo "Build finished; now you can process the JSON files." 78 | 79 | htmlhelp: 80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 81 | @echo 82 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 83 | ".hhp project file in $(BUILDDIR)/htmlhelp." 84 | 85 | qthelp: 86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 87 | @echo 88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/TheConcatenator.qhcp" 91 | @echo "To view the help file:" 92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/TheConcatenator.qhc" 93 | 94 | applehelp: 95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 96 | @echo 97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 98 | @echo "N.B. You won't be able to view it unless you put it in" \ 99 | "~/Library/Documentation/Help or install it in your application" \ 100 | "bundle." 101 | 102 | devhelp: 103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 104 | @echo 105 | @echo "Build finished." 106 | @echo "To view the help file:" 107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/TheConcatenator" 108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/TheConcatenator" 109 | @echo "# devhelp" 110 | 111 | epub: 112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 113 | @echo 114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 115 | 116 | latex: 117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 118 | @echo 119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 121 | "(use \`make latexpdf' here to do that automatically)." 122 | 123 | latexpdf: 124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 125 | @echo "Running LaTeX files through pdflatex..." 126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 128 | 129 | latexpdfja: 130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 131 | @echo "Running LaTeX files through platex and dvipdfmx..." 132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 134 | 135 | text: 136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 137 | @echo 138 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 139 | 140 | man: 141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 142 | @echo 143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 144 | 145 | texinfo: 146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 147 | @echo 148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 149 | @echo "Run \`make' in that directory to run these through makeinfo" \ 150 | "(use \`make info' here to do that automatically)." 151 | 152 | info: 153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 154 | @echo "Running Texinfo files through makeinfo..." 155 | make -C $(BUILDDIR)/texinfo info 156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 157 | 158 | gettext: 159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 160 | @echo 161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 162 | 163 | changes: 164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 165 | @echo 166 | @echo "The overview file is in $(BUILDDIR)/changes." 167 | 168 | linkcheck: 169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 170 | @echo 171 | @echo "Link check complete; look for any errors in the above output " \ 172 | "or in $(BUILDDIR)/linkcheck/output.txt." 173 | 174 | doctest: 175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 176 | @echo "Testing of doctests in the sources finished, look at the " \ 177 | "results in $(BUILDDIR)/doctest/output.txt." 178 | 179 | coverage: 180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 181 | @echo "Testing of coverage in the sources finished, look at the " \ 182 | "results in $(BUILDDIR)/coverage/python.txt." 183 | 184 | xml: 185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 186 | @echo 187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 188 | 189 | pseudoxml: 190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 191 | @echo 192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 193 | -------------------------------------------------------------------------------- /src/sppysound/multirate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Module providing Multirate signal processing functionality. 3 | Largely based on MATLAB's Multirate signal processing toolbox with consultation 4 | of Octave m-file source code. 5 | 6 | Ref: https://github.com/mubeta06/python/blob/master/signal_processing/sp/multirate.py 7 | """ 8 | 9 | import sys 10 | import fractions 11 | import numpy 12 | from scipy import signal 13 | 14 | 15 | def downsample(s, n, phase=0): 16 | """Decrease sampling rate by integer factor n with included offset phase. 17 | """ 18 | return s[phase::n] 19 | 20 | 21 | def upsample(s, n, phase=0): 22 | """Increase sampling rate by integer factor n with included offset phase. 23 | """ 24 | return numpy.roll(numpy.kron(s, numpy.r_[1, numpy.zeros(n-1)]), phase) 25 | 26 | 27 | def decimate(s, r, n=None, fir=False): 28 | """Decimation - decrease sampling rate by r. The decimation process filters 29 | the input data s with an order n lowpass filter and then resamples the 30 | resulting smoothed signal at a lower rate. By default, decimate employs an 31 | eighth-order lowpass Chebyshev Type I filter with a cutoff frequency of 32 | 0.8/r. It filters the input sequence in both the forward and reverse 33 | directions to remove all phase distortion, effectively doubling the filter 34 | order. If 'fir' is set to True decimate uses an order 30 FIR filter (by 35 | default otherwise n), instead of the Chebyshev IIR filter. Here decimate 36 | filters the input sequence in only one direction. This technique conserves 37 | memory and is useful for working with long sequences. 38 | """ 39 | if fir: 40 | if n is None: 41 | n = 30 42 | b = signal.firwin(n, 1.0/r) 43 | a = 1 44 | f = signal.lfilter(b, a, s) 45 | else: #iir 46 | if n is None: 47 | n = 8 48 | b, a = signal.cheby1(n, 0.05, 0.8/r) 49 | f = signal.filtfilt(b, a, s) 50 | return downsample(f, r) 51 | 52 | 53 | def interp(s, r, l=4, alpha=0.5): 54 | """Interpolation - increase sampling rate by integer factor r. Interpolation 55 | increases the original sampling rate for a sequence to a higher rate. interp 56 | performs lowpass interpolation by inserting zeros into the original sequence 57 | and then applying a special lowpass filter. l specifies the filter length 58 | and alpha the cut-off frequency. The length of the FIR lowpass interpolating 59 | filter is 2*l*r+1. The number of original sample values used for 60 | interpolation is 2*l. Ordinarily, l should be less than or equal to 10. The 61 | original signal is assumed to be band limited with normalized cutoff 62 | frequency 0=alpha=1, where 1 is half the original sampling frequency (the 63 | Nyquist frequency). The default value for l is 4 and the default value for 64 | alpha is 0.5. 65 | """ 66 | b = signal.firwin(2*l*r+1, alpha/r); 67 | a = 1 68 | return r*signal.lfilter(b, a, upsample(s, r))[r*l+1:-1] 69 | 70 | 71 | def resample(s, p, q, h=None): 72 | """Change sampling rate by rational factor. This implementation is based on 73 | the Octave implementation of the resample function. It designs the 74 | anti-aliasing filter using the window approach applying a Kaiser window with 75 | the beta term calculated as specified by [2]. 76 | 77 | Ref [1] J. G. Proakis and D. G. Manolakis, 78 | Digital Signal Processing: Principles, Algorithms, and Applications, 79 | 4th ed., Prentice Hall, 2007. Chap. 6 80 | Ref [2] A. V. Oppenheim, R. W. Schafer and J. R. Buck, 81 | Discrete-time signal processing, Signal processing series, 82 | Prentice-Hall, 1999 83 | """ 84 | gcd = fractions.gcd(p,q) 85 | if gcd>1: 86 | p=p/gcd 87 | q=q/gcd 88 | 89 | if h is None: #design filter 90 | #properties of the antialiasing filter 91 | log10_rejection = -3.0 92 | stopband_cutoff_f = 1.0/(2.0 * max(p,q)) 93 | roll_off_width = stopband_cutoff_f / 10.0 94 | 95 | #determine filter length 96 | #use empirical formula from [2] Chap 7, Eq. (7.63) p 476 97 | rejection_db = -20.0*log10_rejection; 98 | l = numpy.ceil((rejection_db-8.0) / (28.714 * roll_off_width)) 99 | 100 | #ideal sinc filter 101 | t = numpy.arange(-l, l + 1) 102 | ideal_filter=2*p*stopband_cutoff_f*numpy.sinc(2*stopband_cutoff_f*t) 103 | 104 | #determine parameter of Kaiser window 105 | #use empirical formula from [2] Chap 7, Eq. (7.62) p 474 106 | beta = signal.kaiser_beta(rejection_db) 107 | 108 | #apodize ideal filter response 109 | h = numpy.kaiser(2*l+1, beta)*ideal_filter 110 | 111 | ls = len(s) 112 | lh = len(h) 113 | 114 | l = (lh - 1)/2.0 115 | ly = numpy.ceil(ls*p/float(q)) 116 | 117 | #pre and postpad filter response 118 | nz_pre = numpy.floor(q - numpy.mod(l,q)) 119 | hpad = h[-lh+nz_pre:] 120 | 121 | offset = numpy.floor((l+nz_pre)/q) 122 | nz_post = 0; 123 | while numpy.ceil(((ls-1)*p + nz_pre + lh + nz_post )/q ) - offset < ly: 124 | nz_post += 1 125 | hpad = hpad[:lh + nz_pre + nz_post] 126 | 127 | #filtering 128 | xfilt = upfirdn(s, hpad, p, q) 129 | 130 | return xfilt[offset-1:offset-1+ly] 131 | 132 | 133 | def upfirdn(s, h, p, q): 134 | """Upsample signal s by p, apply FIR filter as specified by h, and 135 | downsample by q. Using fftconvolve as opposed to lfilter as it does not seem 136 | to do a full convolution operation (and its much faster than convolve). 137 | """ 138 | return downsample(signal.fftconvolve(h, upsample(s, p)), q) 139 | 140 | def main(): 141 | """Show simple use cases for functionality provided by this module. Each 142 | example below attempts to mimic the examples provided by mathworks MATLAB 143 | documentation, http://www.mathworks.com/help/toolbox/signal/ 144 | """ 145 | import pylab 146 | argv = sys.argv 147 | if len(argv) != 1: 148 | print >>sys.stderr, 'usage: python -m pim.sp.multirate' 149 | sys.exit(2) 150 | 151 | #Downsample 152 | x = numpy.arange(1, 11) 153 | print 'Down Sampling %s by 3' % x 154 | print downsample(x, 3) 155 | print 'Down Sampling %s by 3 with phase offset 2' % x 156 | print downsample(x, 3, phase=2) 157 | 158 | #Upsample 159 | x = numpy.arange(1, 5) 160 | print 'Up Sampling %s by 3' % x 161 | print upsample(x, 3) 162 | print 'Up Sampling %s by 3 with phase offset 2' % x 163 | print upsample(x, 3, 2) 164 | 165 | #Decimate 166 | t = numpy.arange(0, 1, 0.00025) 167 | x = numpy.sin(2*numpy.pi*30*t) + numpy.sin(2*numpy.pi*60*t) 168 | y = decimate(x,4) 169 | pylab.figure() 170 | pylab.subplot(2, 1, 1) 171 | pylab.title('Original Signal') 172 | pylab.stem(numpy.arange(len(x[0:120])), x[0:120]) 173 | pylab.subplot(2, 1, 2) 174 | pylab.title('Decimated Signal') 175 | pylab.stem(numpy.arange(len(y[0:30])), y[0:30]) 176 | 177 | #Interp 178 | t = numpy.arange(0, 1, 0.001) 179 | x = numpy.sin(2*numpy.pi*30*t) + numpy.sin(2*numpy.pi*60*t) 180 | y = interp(x,4) 181 | pylab.figure() 182 | pylab.subplot(2, 1, 1) 183 | pylab.title('Original Signal') 184 | pylab.stem(numpy.arange(len(x[0:30])), x[0:30]) 185 | pylab.subplot(2, 1, 2) 186 | pylab.title('Interpolated Signal') 187 | pylab.stem(numpy.arange(len(y[0:120])), y[0:120]) 188 | 189 | #upfirdn 190 | L = 147.0 191 | M = 160.0 192 | N = 24.0*L 193 | h = signal.firwin(N-1, 1/M, window=('kaiser', 7.8562)) 194 | h = L*h 195 | Fs = 48000.0 196 | n = numpy.arange(0, 10239) 197 | x = numpy.sin(2*numpy.pi*1000/Fs*n) 198 | y = upfirdn(x, h, L, M) 199 | pylab.figure() 200 | pylab.stem(n[1:49]/Fs, x[1:49]) 201 | pylab.stem(n[1:45]/(Fs*L/M), y[13:57], 'r', markerfmt='ro',) 202 | pylab.xlabel('Time (sec)') 203 | pylab.ylabel('Signal value') 204 | 205 | #resample 206 | fs1 = 10.0 207 | t1 = numpy.arange(0, 1 + 1.0/fs1, 1.0/fs1) 208 | x = t1 209 | y = resample(x, 3, 2) 210 | t2 = numpy.arange(0,(len(y)))*2.0/(3.0*fs1) 211 | pylab.figure() 212 | pylab.plot(t1, x, '*') 213 | pylab.plot(t2, y, 'o') 214 | pylab.plot(numpy.arange(-0.5,1.5, 0.01), numpy.arange(-0.5,1.5, 0.01), ':') 215 | pylab.legend(('original','resampled')) 216 | pylab.xlabel('Time') 217 | 218 | x = numpy.hstack([numpy.arange(1,11), numpy.arange(9,0,-1)]) 219 | y = resample(x,3,2) 220 | pylab.figure() 221 | pylab.subplot(2, 1, 1) 222 | pylab.title('Edge Effects Not Noticeable') 223 | pylab.plot(numpy.arange(19)+1, x, '*') 224 | pylab.plot(numpy.arange(29)*2/3.0 + 1, y, 'o') 225 | pylab.legend(('original', 'resampled')) 226 | x = numpy.hstack([numpy.arange(10, 0, -1), numpy.arange(2,11)]) 227 | y = resample(x,3,2) 228 | pylab.subplot(2, 1, 2) 229 | pylab.plot(numpy.arange(19)+1, x, '*') 230 | pylab.plot(numpy.arange(29)*2/3.0 + 1, y, 'o') 231 | pylab.title('Edge Effects Very Noticeable') 232 | pylab.legend(('original', 'resampled')) 233 | 234 | pylab.show() 235 | return 0 236 | 237 | if __name__ == '__main__': 238 | sys.exit(main()) 239 | -------------------------------------------------------------------------------- /src/sppysound/analysis/FFTAnalysis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for creating an FFT analysis of audio. 3 | 4 | Ref: Code adapted from: 5 | http://www.frank-zalkow.de/en/code-snippets/create-audio-spectrograms-with-python.html?ckattempt=1 6 | """ 7 | from __future__ import print_function, division 8 | import logging 9 | from fileops import pathops 10 | import numpy as np 11 | from numpy.lib import stride_tricks 12 | import os 13 | from AnalysisTools import ButterFilter 14 | from Analysis import Analysis 15 | import pdb 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class FFTAnalysis(Analysis): 21 | """ 22 | FFT analysis descriptor class for generation of FFT spectral analysis. 23 | 24 | This descriptor calculates the spectral content for overlapping grains 25 | of an AnalysedAudioFile object. A full definition of FFT analysis can be 26 | found in the documentation. 27 | 28 | Arguments: 29 | 30 | - analysis_group: the HDF5 file group to use for the storage of the 31 | analysis. 32 | 33 | - config: The configuration module used to configure the analysis 34 | """ 35 | 36 | def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): 37 | super(FFTAnalysis, self).__init__(AnalysedAudioFile, frames, analysis_group, 'FFT') 38 | self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) 39 | # Store reference to the file to be analysed 40 | self.AnalysedAudioFile = AnalysedAudioFile 41 | 42 | if config: 43 | window_size = config.fft["window_size"] 44 | else: 45 | window_size = 2048 46 | self.analysis_group = analysis_group 47 | self.logger.info("Creating FFT analysis for {0}".format(self.AnalysedAudioFile.name)) 48 | self.create_analysis(frames, window_size=window_size) 49 | self.fft_window_count = None 50 | 51 | 52 | 53 | def create_fft_analysis(self, frames, window_size=512, window_overlap=2, 54 | window_type='hanning'): 55 | """Create a spectral analysis for overlapping frames of audio.""" 56 | if hasattr(frames, '__call__'): 57 | frames = frames() 58 | # Calculate the period of the window in hz 59 | lowest_freq = 1.0 / window_size 60 | # Filter frequencies lower than the period of the window 61 | # filter = ButterFilter() 62 | # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate) 63 | 64 | # frames = filter.filter_butter(frames) 65 | stft = self.stft(frames, window_size, overlapFac=1/window_overlap) 66 | frame_times = self.calc_fft_frame_times( 67 | stft, 68 | frames, 69 | self.AnalysedAudioFile.samplerate 70 | ) 71 | return (stft, frame_times) 72 | 73 | def get_analysis_grains(self, start, end): 74 | """ 75 | Retrieve analysis frames for period specified in start and end times. 76 | arrays of start and end time pairs will produce an array of equivelant 77 | size containing frames for these times. 78 | """ 79 | times = self.analysis_group["FFT"]["times"][:] 80 | start = start / 1000 81 | end = end / 1000 82 | vtimes = times.reshape(-1, 1) 83 | 84 | selection = np.transpose((vtimes >= start) & (vtimes <= end)) 85 | 86 | np.set_printoptions(threshold=np.nan) 87 | 88 | grain_data = [] 89 | for grain in selection: 90 | grain_data.append((self.analysis_group["FFT"]["frames"][grain, :], times[grain])) 91 | 92 | return grain_data 93 | 94 | def hdf5_dataset_formatter(self, *args, **kwargs): 95 | ''' 96 | Formats the output from the analysis method to save to the HDF5 file. 97 | 98 | Places data and attributes in 2 dictionaries to be stored in the HDF5 99 | file. 100 | ''' 101 | frames, frame_times = self.create_fft_analysis(*args, **kwargs) 102 | return ( 103 | { 104 | 'frames': frames, 105 | 'times': frame_times 106 | }, 107 | { 108 | 'win_size': kwargs.pop('window_size', 512), 109 | 'overlap': kwargs.pop('overlap', 2), 110 | 'window_type': kwargs.pop('window_type', 'hanning') 111 | } 112 | ) 113 | 114 | @staticmethod 115 | def stft(sig, frameSize, overlapFac=0.5, window=np.hanning): 116 | """Short time fourier transform of audio signal.""" 117 | win = window(frameSize) 118 | hopSize = int(frameSize - np.floor(overlapFac * frameSize)) 119 | 120 | # zeros at beginning (thus center of 1st window should be for sample nr. 0) 121 | samples = np.append(np.zeros(np.floor(frameSize/2).astype(int)), sig) 122 | # cols for windowing 123 | 124 | cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1 125 | # zeros at end (thus samples can be fully covered by frames) 126 | samples = np.append(samples, np.zeros(frameSize)) 127 | 128 | frames = stride_tricks.as_strided( 129 | samples, 130 | shape=(cols, frameSize), 131 | strides=(samples.strides[0]*hopSize, samples.strides[0]) 132 | ).copy() 133 | 134 | frames *= win 135 | 136 | return np.fft.rfft(frames) 137 | 138 | ''' 139 | def logscale_spec(self, spec, sr=44100, factor=20.): 140 | """Scale frequency axis logarithmically.""" 141 | # Get a count of times and frequencies from fft frames 142 | timebins, freqbins = np.shape(spec) 143 | 144 | # Create array from 0 to 1 with values for each frequency bin. 145 | # Scale by a power of the factor provided. 146 | scale = np.linspace(0, 1, freqbins) ** factor 147 | # Scale to the number of frequency bins 148 | scale *= (freqbins-1)/max(scale) 149 | # Round to the nearest whole number and reduce to only unique numbers. 150 | scale = np.unique(np.round(scale)) 151 | 152 | # Create a new complex number array with the number of time frames and 153 | # the new number of frequency bins 154 | newspec = np.complex128(np.zeros([timebins, len(scale)])) 155 | # For each of the frequency bins 156 | for i in range(0, len(scale)): 157 | # If it is the highest frequency bin... 158 | if i == len(scale)-1: 159 | # Sum all frequency bins from the scale index upwards 160 | newspec[:, i] = np.sum(spec[:, scale[i]:], axis=1) 161 | else: 162 | # Sum all frequency bins from the current scale index up to the 163 | # next scale index 164 | newspec[:, i] = np.sum(spec[:, scale[i]:scale[i+1]], axis=1) 165 | 166 | # List the center frequency of bins 167 | allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1]) 168 | freqs = [] 169 | # For each of the frequency bins 170 | for i in range(0, len(scale)): 171 | if i == len(scale)-1: 172 | freqs += [np.mean(allfreqs[scale[i]:])] 173 | else: 174 | freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])] 175 | 176 | return newspec, freqs 177 | 178 | def plotstft(self, samples, fs, binsize=2**10, plotpath=None, 179 | colormap="jet"): 180 | """Plot spectrogram.""" 181 | # Get all fft frames 182 | s = self.analysis['data'][:] 183 | 184 | sshow, freq = self.logscale_spec(s, factor=1.0, sr=fs) 185 | 186 | # Amplitude to decibel 187 | ims = 20.*np.log10(np.abs(sshow)/10e-6) 188 | 189 | # Get the dimensions of the fft 190 | timebins, freqbins = np.shape(ims) 191 | 192 | plt.figure(figsize=(15, 7.5)) 193 | plt.imshow(np.transpose(ims), origin="lower", aspect="auto", 194 | cmap=colormap) 195 | # Add a colour bar to the side of the spectrogram. 196 | plt.colorbar() 197 | 198 | # Set spectrogram labels 199 | plt.xlabel("time (s)") 200 | plt.ylabel("frequency (hz)") 201 | plt.xlim([0, timebins-1]) 202 | plt.ylim([0, freqbins]) 203 | 204 | # Create an array of 5 values from 0 to the number of times 205 | xlocs = np.float32(np.linspace(0, timebins-1, 5)) 206 | # Display time values at 5 points along the x axis of the graph 207 | plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/fs]) 208 | # Display frequency values at 10 points along the y axis of the graph 209 | ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10))) 210 | plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) 211 | 212 | if plotpath: 213 | plt.savefig(plotpath, bbox_inches="tight") 214 | else: 215 | plt.show() 216 | 217 | plt.clf() 218 | ''' 219 | 220 | def calc_fft_frame_times(self, fftframes, sample_frames, samplerate): 221 | """Calculate times for frames using sample size and samplerate.""" 222 | 223 | if hasattr(sample_frames, '__call__'): 224 | sample_frames = sample_frames() 225 | # Get number of frames for time and frequency 226 | timebins, freqbins = np.shape(fftframes) 227 | # Create array ranging from 0 to number of time frames 228 | scale = np.arange(timebins+1) 229 | # divide the number of samples by the total number of frames, then 230 | # multiply by the frame numbers. 231 | fft_times = (sample_frames.shape[0]/timebins) * scale[:-1] 232 | # Divide by the samplerate to give times in seconds 233 | fft_times = fft_times / samplerate 234 | return fft_times 235 | 236 | 237 | --------------------------------------------------------------------------------