├── 909_test └── original_filenames.txt ├── all_2frames └── dendrogram.png ├── figures ├── 909_clustered.pdf ├── clustermap_909.png ├── minipops_output.png └── umap_909.png ├── get_clusters.py ├── minipops_2frames └── original_filenames.txt ├── readme.md ├── visualize_clusters_and_umap.R └── wav_clustering_workflow.py /909_test/original_filenames.txt: -------------------------------------------------------------------------------- 1 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/hh01.wav 2 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/lt03.wav 3 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/lt02.wav 4 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/hh02.wav 5 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/lt01.wav 6 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/hh03.wav 7 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/lt05.wav 8 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/lt04.wav 9 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/rs02.wav 10 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/hh04.wav 11 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/lt06.wav 12 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/lt07.wav 13 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/rs01.wav 14 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/mt08.wav 15 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/ht04.wav 16 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd03.wav 17 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd02.wav 18 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/ht05.wav 19 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/ht07.wav 20 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd14.wav 21 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd01.wav 22 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd15.wav 23 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/ht06.wav 24 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/bd08.wav 25 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd11.wav 26 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd05.wav 27 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/ht02.wav 28 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/ht03.wav 29 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd04.wav 30 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd10.wav 31 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/bd09.wav 32 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd06.wav 33 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd12.wav 34 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/ht01.wav 35 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd13.wav 36 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd07.wav 37 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/mt01.wav 38 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/bd07.wav 39 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/cr02.wav 40 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/cr03.wav 41 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/bd06.wav 42 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/cp01.wav 43 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/mt02.wav 44 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/cp03.wav 45 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/bd04.wav 46 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/bd10.wav 47 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd09.wav 48 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/cr01.wav 49 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/sd08.wav 50 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/bd05.wav 51 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/cp02.wav 52 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/mt03.wav 53 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/mt07.wav 54 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/bd01.wav 55 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/cr04.wav 56 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/mt06.wav 57 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/mt04.wav 58 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/bd02.wav 59 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/ht08.wav 60 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/cp04.wav 61 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/bd03.wav 62 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/mt05.wav 63 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/rd01.wav 64 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/oh01.wav 65 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/rd02.wav 66 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/oh02.wav 67 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/lt08.wav 68 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/oh03.wav 69 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/rd03.wav 70 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/rd04.wav 71 | /Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/oh04.wav 72 | -------------------------------------------------------------------------------- /all_2frames/dendrogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TylerMclaughlin/wav_clustering_workflow/374c95e74b3f9adc6e2a1de49635946c575ae8e2/all_2frames/dendrogram.png -------------------------------------------------------------------------------- /figures/909_clustered.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TylerMclaughlin/wav_clustering_workflow/374c95e74b3f9adc6e2a1de49635946c575ae8e2/figures/909_clustered.pdf -------------------------------------------------------------------------------- /figures/clustermap_909.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TylerMclaughlin/wav_clustering_workflow/374c95e74b3f9adc6e2a1de49635946c575ae8e2/figures/clustermap_909.png -------------------------------------------------------------------------------- /figures/minipops_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TylerMclaughlin/wav_clustering_workflow/374c95e74b3f9adc6e2a1de49635946c575ae8e2/figures/minipops_output.png -------------------------------------------------------------------------------- /figures/umap_909.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TylerMclaughlin/wav_clustering_workflow/374c95e74b3f9adc6e2a1de49635946c575ae8e2/figures/umap_909.png -------------------------------------------------------------------------------- /get_clusters.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import pickle 4 | import numpy as np 5 | from shutil import copy 6 | from scipy.cluster.hierarchy import ward, leaves_list, fcluster 7 | 8 | def load_linkage(data_dir): 9 | with open(os.path.join(data_dir, 'ward_linkage.pkl'), 'rb') as f: 10 | Z = pickle.load(f) 11 | return Z 12 | 13 | def save_clusters_into_dirs(data_dir, Z, cluster_membership): 14 | ll = leaves_list(Z) 15 | all_output_wavs = sorted(glob.glob(data_dir + '/*.wav') + glob.glob(data_dir + '/*.aif')) 16 | new_clusters = cluster_membership[ll] 17 | # make dictionary between original number and output wavs. 18 | for i, c in enumerate(new_clusters): 19 | cluster_dir = os.path.join(data_dir, 'clusters', 'cluster_' + str(c).zfill(4)) 20 | if not os.path.exists(cluster_dir): 21 | os.makedirs(cluster_dir) 22 | copy(all_output_wavs[i], cluster_dir) 23 | 24 | def save_cluster_folders(data_dir, n_clusters): 25 | # to do: implement prompt if folder/file exists 26 | Z = load_linkage(data_dir) 27 | cm = fcluster(Z, n_clusters, criterion = 'maxclust') 28 | save_clusters_into_dirs(data_dir, Z, cm) 29 | 30 | def test_korg(): 31 | save_cluster_folders('korg_2frames', 5) 32 | -------------------------------------------------------------------------------- /minipops_2frames/original_filenames.txt: -------------------------------------------------------------------------------- 1 | /Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/[KB6]_Korg_Minipops/hihat1.wav 2 | /Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/[KB6]_Korg_Minipops/sd2.wav 3 | /Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/[KB6]_Korg_Minipops/wood2.wav 4 | /Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/[KB6]_Korg_Minipops/hihat2.wav 5 | /Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/[KB6]_Korg_Minipops/sd1.wav 6 | /Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/[KB6]_Korg_Minipops/sd3.wav 7 | /Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/[KB6]_Korg_Minipops/bd1.wav 8 | /Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/[KB6]_Korg_Minipops/bd2.wav 9 | /Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/[KB6]_Korg_Minipops/bd3.wav 10 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # WAV Clustering Workflow 2 | 3 | This tool uses machine learning to organize a given collection of audio samples by acoustic similarity. 4 | 5 | Presented as a workflow in `Python`, this project applies hierarchical clustering on 68 derived acoustic features for the purpose of grouping and ordering audio samples by similarity. `.wav` files from many separate directories can be used as input. In a new directory, renamed copies of all the input `.wav` files are saved; these copies are named such that their alphabetical order corresponds to acoustic similarity as determined by clustering. In other words, adjacent samples in the new directory, e.g. `00012.wav` and `00013.wav`, will sound similar, whereas `00012.wav` and `00082.wav` will sound less similar. Finally, it saves a dendrogram for visualization of the clusters. 6 | 7 | The exploratory `R` code can be used to visualize a clustered feature heatmap and a UMAP dimensionality reduction. 8 | 9 | The reliability of the algorithm was assessed on 71 Roland 909 drum machine samples from the free [BPB 909 Casette sample pack](https://bedroomproducersblog.com/2014/04/24/free-909-samples/). 10 | The algorithm perfectly separates 909 cymbals (hi-hats, ride cymbal, crash cymbal) from 909 membranophones ( kick drums, toms, snare drums) and further correctly separates and groups drums at a finer level ( e.g., all snare drums are grouped together). Interesting groupings arise such as the 909 clap being grouped with crash and ride cymbals rather than the snare drum. 11 | 12 | Acoustic features are extracted from the first two non-overlapping 50 ms frames of the audio. 68 features per frame per sample are calculated using the pyAudioAnalysis library. Features include MFCCs, chroma, energy, spectral entropy, and more from `pyAudioAnalysis`. 13 | 14 | To benchmark speed and scalability, hierarchical clustering was also run on 18 thousand drum machine samples (~5 Gb) from [kb6](https://samples.kb6.de/downloads.php), taking less than 1 hour on a 2019 MacBook Pro. 15 | 16 | Finally, the structure of the sorted output `.wav` files was designed to be compatible with the [TidalCycles pattern language for live-coding music](https://tidalcycles.org/Welcome). 17 | By reordering `.wav` files such that similar `.wav` files are next to one another, controlled variation can be achieved in TidalCycles by patterning the sample number. 18 | 19 | 20 | ## Requirements 21 | 22 | * `Python 3` 23 | 24 | #### Python modules: 25 | 26 | * `pyAudioAnalysis` 27 | * `pandas` 28 | * `scipy` 29 | * `matplotlib` 30 | 31 | #### optional R packages : 32 | 33 | These R packages are for exploratory data visualization. The 909 benchmark below uses them to plot the clustered feature heatmap and dimensionality reduction. (see script `visualize_clusters_and_umap.R`). `R` and this script are not required to use this tool for its basic purposes: clustering and organizing audio samples. 34 | 35 | 36 | * `data.table` 37 | * `pheatmap` 38 | * `ggplot2` 39 | * `UMAP` 40 | 41 | 42 | 43 | ## 909 drum machine clustering benchmark results 44 | 45 | This benchmark can be considered a positive control experiment. 46 | 47 | It was unclear whether hierarchical clustering could be used to reliably separate drum machine sounds, like bass drums, toms, hi-hats, snares, etc from one another. This 909 benchmark experiment shows that the method is indeed reliable. 48 | 49 | The **drum categories** (like "bd" for bass drum samples or "hh" for hi-hat samples) as dictated by the filename of the sample were used as "truth labels". Because drum categories for were available for these samples (shown in the colored bar above the heatmap), it is possible to do a quick assessment to compare the drum categories to the clusters derived from purely acoustic properties. Hierarchical clustering separates the drum categories surprisingly well. 50 | 51 | ![heatmap](./figures/clustermap_909.png) 52 | 53 | The following UMAP projection reduces the dimensionality of the feature space from 136 (68 * 2 frames) to 2 dimensions. 54 | ![umap](./figures/umap_909.png) 55 | This UMAP plot is interesting but it is difficult to explain the groupings. Perhaps it reflects similarities and differences in synthesis / sampling methods of the drums of the 909. Clockwise from the left, we have membranophones, snares, and cymbals + clap. 56 | 57 | NB: The `R` script is required to generate plots like these. 58 | 59 | 60 | ## Performance benchmark: 18 thousand drum machine samples 61 | 62 | ![dendrogram](./all_2frames/dendrogram.png) 63 | 64 | 45 thousand `.wav` files from several hundred unique drum machine models were first downloaded from [kb6](https://samples.kb6.de/downloads.php). 65 | 66 | 18 thousand `.wav` files were long enough to extract features on two 50 ms frames. Only these 18 thousand samples were hierarchically clustered; the others were excluded from the benchmark. 67 | 68 | The total size of the input `.wav` files that were clustered was ~5 GB. 69 | Feature extraction and hierarchical clustering completed in less than one hour on a 2019 MacBook Pro. 70 | 71 | 72 | 73 | ## Usage 74 | 75 | The easiest way to use this tool is as a `Python` module, via `import wav_clustering_workflow as wcw` 76 | 77 | Additionally, the `glob` module (installed with Python) offers a convenient method for pattern matching. `glob` supports Unix-style wildcards so it can be used to specify multiple `.wav` files with a single expression. 78 | 79 | ```python 80 | # import this repository 81 | import wav_clustering_workflow as wcw 82 | 83 | # import glob; comes with Python 84 | import glob 85 | 86 | 87 | # set the parent directory for your samples. this part of the path will not go into the name of the .wavs in the dendrogram visualization. 88 | parent_dir = 'path/to/your/samples' 89 | 90 | 91 | # make a list of absolute paths to all the .wav files you want to analyze. This is how the input is specified. 92 | # use multiple subdirectories of the parent dir if you want! (shown with the first asterisk). 93 | # These subdirectories will be prepended to the name of the wav in the dendrogram. 94 | wavs = glob.glob(parent_dir + 'sample_folder_*/*.wav') 95 | 96 | 97 | # you can also add globbed lists together for more flexibility. 98 | more_wavs = glob.glob(parent_dir + 'other_sample_folder_*/*.wav') 99 | wavs = wavs + more_wavs 100 | 101 | 102 | # Now we are ready to call the main function. 103 | # Specify the number of frames to extract features from. Here we are using 2 frames. 2 seems to be sufficient for drum machine sample analyses. 104 | # You must also specify the name for the output directory. 105 | # The output directory will include sorted copies of the input .wavs, the dendrogram visualization, and a text file showing the original paths of the .wavs 106 | 107 | wcw.cluster_and_save_order(wavs, 2, parent_dir = parent_dir, outdir = 'your_output_directory') 108 | 109 | ``` 110 | 111 | More example functions can be found in the main code. This last example takes all the samples from the Korg Minipops drum machine and saves them in a new directory called 'minipops_2frames'. 112 | 113 | ```python 114 | 115 | parent_dir = '/Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/' 116 | # fix the '[' symbols in the directory by replacing '[' with '[[]'. 117 | minipops_wavs = glob.glob(parent_dir + '[[]KB6[]]_Korg_Minipops/*.wav') 118 | wcw.cluster_and_save_order(minipops_wavs, 2, parent_dir = parent_dir, outdir = 'minipops_2frames') 119 | ``` 120 | This output directory contains the following generated outputs including copied, renamed, and sorted `.wav` files: 121 | ![minipops_output](./figures/minipops_output.png) 122 | 123 | 124 | 125 | ## Features not yet implemented 126 | 127 | * Command-line interface 128 | * Test performance on longer samples, using either different window sizes or the different `pyAudioAnalysis` feature extraction functions for longer timescales. 129 | * Interactive UMAP audio plot, like [this](https://petergill.shinyapps.io/shinyplay/) 130 | -------------------------------------------------------------------------------- /visualize_clusters_and_umap.R: -------------------------------------------------------------------------------- 1 | library(data.table) 2 | library(pheatmap) 3 | 4 | data <- fread('data/drums_and_features.csv') 5 | 6 | dm <- data.matrix(data) 7 | 8 | wavnames <- colnames(dm) 9 | 10 | drum.categories <- unlist(lapply(wavnames, function(x){substr(x,0,2)})) 11 | 12 | d.anno <- data.frame(drum.categories) 13 | row.names(d.anno) <- colnames(dm) 14 | 15 | pheatmap(dm, annotation_col = d.anno) 16 | 17 | my_heatmap 18 | 19 | min(dm + 39, na.rm = TRUE) 20 | 21 | #dmc <- dm[complete.cases(dm),] 22 | 23 | dmcs <- (dm - mean(dm, na.rm = TRUE)) / sd(dm, na.rm = TRUE) 24 | 25 | for(i in 1:ncol(dmcs)){ 26 | dmcs[is.na(dmcs[,i]), i] <- mean(dmcs[,i], na.rm = TRUE) 27 | } 28 | 29 | 30 | pheatmap(log(dmcs + 10)) 31 | 32 | 33 | for(i in 1:ncol(dm)){ 34 | dm[is.na(dm[,i]), i] <- mean(dm[,i], na.rm = TRUE) 35 | } 36 | 37 | dm[is.na(dm)] = 4 38 | 39 | hist(dm) 40 | 41 | dm2 <- dm 42 | 43 | dm2[dm2 < -5] <- 0.0 44 | 45 | hist(scale(dm2), breaks = 100) 46 | 47 | sdm2 <- scale(dm2) 48 | sdm2[sdm2 > 1.5] <- 1.5 49 | sdm2[sdm2 < -1.5] <- -1.5 50 | 51 | pheatmap(sdm2, annotation_col = d.anno) 52 | 53 | library(umap) 54 | 55 | umap.dm <- umap(t(sdm2)) 56 | 57 | umap.dm.2 <- umap(t(dm)) 58 | 59 | dt <- data.table(x = umap.dm$layout[,1], y = umap.dm$layout[,2], sample = drum.categories) 60 | 61 | ggplot(dt) + geom_point(aes(x = x, y = y, color = sample)) + theme_bw() 62 | -------------------------------------------------------------------------------- /wav_clustering_workflow.py: -------------------------------------------------------------------------------- 1 | """ 2 | Hierarchically clusters wav samples. Currently tested on drum samples. Saves copies that are renamed -- numbered and ordered by similarity. 3 | """ 4 | 5 | import os 6 | import glob 7 | import pandas as pd 8 | from shutil import copy 9 | import pickle 10 | 11 | from pyAudioAnalysis import ShortTermFeatures as aF 12 | from pyAudioAnalysis import audioBasicIO as aIO 13 | 14 | 15 | from scipy.cluster.hierarchy import ward, dendrogram, leaves_list 16 | from scipy.spatial.distance import pdist 17 | from matplotlib import pyplot as plt 18 | 19 | def get_short_term_features(wav_loc, win = 0.050, step = 0.050): 20 | """ 21 | Extract short-term features using default 50msec non-overlapping windows 22 | """ 23 | 24 | # get sampling frequency and signal. 25 | fs, s = aIO.read_audio_file(wav_loc) 26 | # convert to mono so all features work! 27 | s = aIO.stereo_to_mono(s) 28 | 29 | # print duration of wav in seconds: 30 | duration = len(s) / float(fs) 31 | print(f'{wav_loc} duration = {duration} seconds') 32 | 33 | # features, feature names. 34 | # feature names look like ['zcr', 'energy', 'energy_entropy', 'spectral_centroid', 'spectral_spread', 'spectral_entropy', 'spectral_flux', 'spectral_rolloff', 'mfcc_1', 'mfcc_2', 'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 'mfcc_9', 'mfcc_10', 'mfcc_11', 'mfcc_12', 'mfcc_13', 'chroma_1', 'chroma_2', 'chroma_3', 'chroma_4', 'chroma_5', 'chroma_6', 'chroma_7', 'chroma_8', 'chroma_9', 'chroma_10', 'chroma_11', 'chroma_12', 'chroma_std', 'delta zcr', 'delta energy', 'delta energy_entropy', 'delta spectral_centroid', 'delta spectral_spread', 'delta spectral_entropy', 'delta spectral_flux', 'delta spectral_rolloff', 'delta mfcc_1', 'delta mfcc_2', 'delta mfcc_3', 'delta mfcc_4', 'delta mfcc_5', 'delta mfcc_6', 'delta mfcc_7', 'delta mfcc_8', 'delta mfcc_9', 'delta mfcc_10', 'delta mfcc_11', 'delta mfcc_12', 'delta mfcc_13', 'delta chroma_1', 'delta chroma_2', 'delta chroma_3', 'delta chroma_4', 'delta chroma_5', 'delta chroma_6', 'delta chroma_7', 'delta chroma_8', 'delta chroma_9', 'delta chroma_10', 'delta chroma_11', 'delta chroma_12', 'delta chroma_std'] 35 | # features f look like numpy matrices 36 | try: 37 | [f, fn] = aF.feature_extraction(s, fs, int(fs * win), int(fs * step)) 38 | print(f'{f.shape[1]} frames, {f.shape[0]} short-term features') 39 | 40 | return [f, fn] 41 | # sometimes the feature extraction yields a ValueError because the sample is too short. 42 | except ValueError: 43 | return None 44 | 45 | def flatten_n_frames(f,n): 46 | m = f[:,:n] 47 | # use Fortran order so that [[1,2],[3,4],[5,6]] becomes [1,3,5,2,4,6] (i.e., adjacent frames first, then onto the next feature.) 48 | return m.flatten('F') 49 | 50 | def get_features_frame(wav_locs, first_n_frames, include_parent_dir = False): 51 | """ 52 | Iterates over the list of paths to each wav file of interest. Extracts feature matrix. Subsets the feature matrix to the first_n_frames. 53 | 54 | include_parent_dir should be set to True if the parent directory of the sample contains meaningful information (like the drum machine, for instance). 55 | If this is the case, the new drum name will be like 'kicks/kd01.wav'. I call this a 'dir_wav'. 56 | 57 | """ 58 | feature_dict = {} 59 | for w in wav_locs: 60 | wav_basename = w.split('/')[-1] 61 | if include_parent_dir: # For plotting, make the name of the wav a 'dir_wav', e.g., 'dir_name/sample_name.wav' 62 | wav_dirname = w.split('/')[-2] 63 | wav_basename = wav_dirname + '/' + wav_basename 64 | try: 65 | f, fn = get_short_term_features(w) 66 | feature_dict[wav_basename] = flatten_n_frames(f, first_n_frames) 67 | except TypeError: 68 | print(f'{w} appears to be too short to extract features') 69 | features_wavs_df = pd.DataFrame.from_dict(feature_dict, orient='index').transpose() 70 | return features_wavs_df 71 | 72 | 73 | def save_ordered_wav_copies(parent_dir, list_of_dir_wavs, outdir): 74 | """ 75 | list_of_dir_wavs is a list of wavs in their directories, like ['kicks/kd01.wav','kicks/kd02.wav','snare/sd01.wav','hh/hh01.wav']. 76 | Saves list of original filenames as a text file. 77 | Copies the files to a new, sorted location using the function copy() from shutil. 78 | Symlinking would save space, but since it doesn't work for all applications, I went with copy(). 79 | """ 80 | # make the output directory if it doesn't exist. 81 | if not os.path.exists(outdir): 82 | os.mkdir(outdir) 83 | # write the original filenames (and full path) to a text file, in the same order as the output sorted and numbered wavs. 84 | # Basically a reference table. 85 | # If sample library is organized, by drum machine for example, this text file is useful for looking up the instrument or drum name of samples. 86 | with open(os.path.join(outdir, "original_filenames.txt"), "w") as output: 87 | for orig_wav in list_of_dir_wavs: 88 | output.write(str(os.path.join(parent_dir, orig_wav)) +'\n') 89 | for i, source_wav in enumerate(list_of_dir_wavs): 90 | # add leading zeros to the output filename, which is a number (corresponding to leaf-order) 91 | # followed by the original name of the wav. 92 | out_filename = str(i).zfill(5) + '_' + source_wav.replace('/','_') 93 | copy(os.path.join(parent_dir, source_wav), os.path.join(outdir,out_filename)) 94 | 95 | # a few tests for the BPB Casette 909 data set. 96 | # a 'sanity check' or positive control. 97 | # If the clustering is working, all snares should be clustered together, all cymbals should cluster together, etc. 98 | 99 | def save_test_909_data(): 100 | """ 101 | Saves pandas df of features for all 909 samples 102 | """ 103 | wav_pattern = "/Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/*.wav" 104 | bp909_wavs = sorted(glob.glob(wav_pattern)) 105 | print(bp909_wavs) 106 | out_matrix = get_features_frame(bp909_wavs, 2) 107 | out_matrix.to_csv('data/drums_and_features.csv', index = None) 108 | print(out_matrix) 109 | 110 | 111 | def cluster_test_909_data(): 112 | """ 113 | Extends the test above to perform hierarchical clustering. Also saves renamed copies of the wav files and displays a plotted dendrogram. 114 | 115 | Implementation details: 116 | Transposes the dataframe, makes it an n x m numpy matrix, n features and m samples. 117 | """ 118 | wav_pattern = "/Users/mclaurt/Music/dahnloads/BPB Cassette 909/clean/*.wav" 119 | bp909_wavs = glob.glob(wav_pattern) 120 | df_matrix = get_features_frame(bp909_wavs, 2) 121 | df_matrix.fillna(0, inplace = True) 122 | feature_matrix = df_matrix.T.values # n dimensional, m observations. 123 | Z = ward(pdist(feature_matrix)) 124 | ll = list(leaves_list(Z)) 125 | drumnames = df_matrix.columns[ll] 126 | print(drumnames) 127 | list_of_909_dir_wavs = ['clean/' + x for x in df_matrix.columns] 128 | save_ordered_wav_copies("/Users/mclaurt/Music/dahnloads/BPB Cassette 909/", list_of_dir_wavs = list_of_909_dir_wavs, outdir = '909_test') 129 | # For labels, use the ordering of the dataframe columns, NOT the order in the leaves_list, ll. 130 | # left means the roots are on the left, rather than the top. 131 | dn = dendrogram(Z, labels = list_of_909_dir_wavs, orientation = 'left') 132 | plt.savefig('909_test/dendrogram.png') 133 | plt.close() 134 | 135 | def pickle_object(obj, outdir, out_name): 136 | with open(os.path.join(outdir,out_name),'wb') as f: 137 | pickle.dump(obj, f) 138 | 139 | def cluster_and_save_order(globbed_wav_list, n_frames, parent_dir, outdir): 140 | """ 141 | This is the function for hierarchically clustering wav files and saving them with renamed files, sorted by similarity. 142 | taking a list of wavs, a number of time frames 143 | """ 144 | n_samples_in_glob = len(globbed_wav_list) 145 | if n_samples_in_glob < 3: 146 | print(f'{n_samples_in_glob} detected in input list: {globbed_wav_list}. Not enough to cluster.') 147 | raise FileNotFoundError 148 | df_matrix = get_features_frame(globbed_wav_list, n_frames, include_parent_dir = True) 149 | # Replace missing data. 150 | # This is necessary for files that aren't long enough to have features for all the time frames. 151 | # Fills NaNs with zero. Zero is pretty arbitrary. 152 | df_matrix.fillna(0, inplace = True) 153 | feature_matrix = df_matrix.T.values # n dimensional, m observations. 154 | Z = ward(pdist(feature_matrix)) 155 | ll = list(leaves_list(Z)) 156 | #print(ll) 157 | drumnames = df_matrix.columns[ll] 158 | #print(f'wav names in pre-clustered order: {df_matrix.columns}') 159 | #print(f'wav names in clustered order: {drumnames}') 160 | # saves renamed, copies of the wav files, sorted by similarity. 161 | # also saves the original wav file names in the order. 162 | save_ordered_wav_copies(parent_dir, list_of_dir_wavs = drumnames, outdir = outdir) 163 | # Save the clustering result so it can be used later for grouping. 164 | pickle_object(Z, outdir, 'ward_linkage.pkl') 165 | # Save the dataframe. 166 | print('Writing feature matrix.') 167 | pickle_object(df_matrix, outdir, 'df_matrix.pkl') 168 | df_matrix.to_csv('all_features.csv',index = False) 169 | # For labels, use the ordering of the dataframe columns, NOT the order in the leaves_list, ll. 170 | # left means the roots are on the left, rather than the top. 171 | dn = dendrogram(Z, labels = df_matrix.columns, orientation = 'left') 172 | # save plot in the out dir. 173 | plt.savefig(os.path.join(outdir,'dendrogram.png')) 174 | plt.close() 175 | 176 | # the following functions compartmentalize some experiments. 177 | # They focus on either a single drum machine (Korg Minipops), a single manufacturer (Korg / Elektron), the entire kb6 collection. 178 | 179 | def get_minipops(): 180 | parent_dir = '/Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/' 181 | minipops_wavs = sorted(glob.glob(parent_dir + '[[]KB6[]]_Korg_Minipops/*.wav')) 182 | cluster_and_save_order(minipops_wavs, 2, parent_dir = parent_dir, outdir = 'minipops_2frames') 183 | 184 | def get_all_elektron(): 185 | parent_dir = '/Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/' 186 | elektron_wavs = glob.glob(parent_dir + '[[]KB6[]]_Electron*/*.wav') 187 | cluster_and_save_order(elektron_wavs, 2, parent_dir = parent_dir, outdir = 'elektron_2frames') 188 | 189 | def get_all_korg(): 190 | parent_dir = '/Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/' 191 | wavs = glob.glob(parent_dir + '[[]KB6[]]_Korg*/*.wav') 192 | cluster_and_save_order(wavs, 2, parent_dir = parent_dir, outdir = 'korg_2frames') 193 | 194 | def get_all_kb6(): 195 | parent_dir = '/Users/mclaurt/Music/dahnloads/kb6_drum_samples/ALL_EXTRACTED/' 196 | wavs = glob.glob(parent_dir + '[[]KB6[]]_*/*.wav') 197 | cluster_and_save_order(wavs, 2, parent_dir = parent_dir, outdir = 'all_2frames') 198 | 199 | def get_909_test(): 200 | parent_dir = "/Users/mclaurt/Music/dahnloads/BPB Cassette 909/" 201 | wavs = sorted(glob.glob(parent_dir + "clean/*.wav")) 202 | cluster_and_save_order(wavs, 2, parent_dir = parent_dir, outdir = 'test_clustering_909s') 203 | --------------------------------------------------------------------------------