├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── annotations └── .gitignore ├── automix ├── __init__.py ├── config.py ├── featureExtraction │ ├── __init__.py │ ├── automaticDrumsTranscription │ │ ├── __init__.py │ │ ├── adtLibProxy.py │ │ └── madmomDrumsProxy.py │ ├── beats │ │ ├── __init__.py │ │ └── madmomBeatDetection.py │ ├── danceability │ │ ├── __init__.py │ │ └── essentiaDanceability.py │ ├── estimator.py │ ├── harmonicPercussiveClassification │ │ ├── __init__.py │ │ ├── classification.py │ │ ├── hpss.py │ │ ├── io.py │ │ └── parameters.json │ ├── key │ │ ├── __init__.py │ │ ├── edmkeyProxy.py │ │ ├── key.py │ │ └── parameters.json │ ├── lowLevel │ │ ├── __init__.py │ │ ├── coreFinder.py │ │ ├── cqt.py │ │ ├── normalize.py │ │ ├── onsetDetection.py │ │ ├── pcp.py │ │ ├── peakPicking.py │ │ ├── peakSelection.py │ │ ├── periodicity.py │ │ ├── quantize.py │ │ ├── readFile.py │ │ ├── replayGain.py │ │ └── windowing.py │ ├── melody │ │ ├── __init__.py │ │ └── melodiaProxy.py │ ├── novelty │ │ ├── __init__.py │ │ ├── checkerboard.py │ │ ├── derivative.py │ │ ├── olda.py │ │ ├── spectralClustering.py │ │ └── structuralFeatures.py │ ├── structure │ │ ├── __init__.py │ │ ├── eval.py │ │ ├── msafProxy.py │ │ ├── salientPointDetection.py │ │ ├── spectralClustering.py │ │ └── spectralClusteringSegmenter.py │ └── vocalSeparation │ │ ├── __init__.py │ │ ├── librosaVocalSeparation.py │ │ └── vocalMelodyExtraction.py ├── model │ ├── __init__.py │ ├── classes │ │ ├── __init__.py │ │ ├── deck.py │ │ ├── lazyProperty.py │ │ ├── point.py │ │ ├── signal.py │ │ └── track.py │ ├── eval │ │ ├── __init__.py │ │ └── cueEval.py │ └── inputOutput │ │ ├── __init__.py │ │ ├── downloader │ │ ├── __init__.py │ │ ├── downloaders.py │ │ ├── mixesDBProxy.py │ │ └── oneThousandOnetracklistsProxy.py │ │ ├── serializer │ │ ├── __init__.py │ │ ├── dbSerializer.py │ │ ├── featureSerializer.py │ │ ├── graphvizSerializer.py │ │ ├── jamsSerializer.py │ │ ├── reaperProxy.py │ │ ├── serializer.py │ │ ├── sonification.py │ │ ├── traktorSerializer.py │ │ ├── veireerializer.py │ │ └── xmlSerializer.py │ │ ├── template │ │ ├── __init__.py │ │ ├── item.template.RPP │ │ ├── project.template.RPP │ │ └── track.template.RPP │ │ └── trackBuilder.py ├── rules │ ├── __init__.py │ ├── activityRule.py │ ├── eventsAlignmentRule.py │ ├── harmonicTransitionRule.py │ ├── lengthRule.py │ ├── maxPlayrateChangeRule.py │ ├── percussiveTransitionRule.py │ ├── rule.py │ ├── suitableKeyRule.py │ ├── transitionRule.py │ └── veireTransitionsRule.py ├── tests │ └── testFeatureExtraction.py ├── transition │ ├── __init__.py │ └── ruleDriven.py └── utils │ ├── __init__.py │ ├── perceptiveLoudness │ ├── __init__.py │ └── loudness.py │ └── quantization.py ├── bin └── mainCues.py ├── requirements.txt ├── setup.py └── vendors └── install dep.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Basic files to remove 2 | *.reapeaks 3 | *.wav 4 | *.wave 5 | *.mp3 6 | *.mp4 7 | *.webm 8 | *.pyc 9 | *.RPP 10 | *.RPP-bak 11 | *.pdf 12 | __pycache__ 13 | annotations/* 14 | 15 | # Vendors 16 | vendors/* 17 | 18 | # VSCode conf files 19 | .vscode 20 | .features_msaf_tmp.json 21 | .pylintrc 22 | .style.yapf 23 | .~lock.* 24 | setup.cfg 25 | 26 | # debugging files 27 | myMain.py 28 | restats 29 | tests/cue finding 30 | annotations/graph 31 | 32 | #compiled files 33 | *.egg-info 34 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendors/edmkey"] 2 | path = vendors/edmkey 3 | url = https://github.com/angelfaraldo/edmkey.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Mickael Zehren 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Automix 2 | 3 | Automatic DJ-mixing of tracks 4 | 5 | ## Install 6 | 7 | Clone or download the repository, then run from within the folder: 8 | 9 | pip install . 10 | 11 | Or to keep the project editable, use: 12 | 13 | pip install . --editable 14 | 15 | ## Dependencies 16 | 17 | Installing the project with pip should download all the dependencies except Richard Vogl's drums transcription: 18 | This project is a fork of Madmom, and has to be installed it in a different environment to keep both libraries accessible. 19 | 20 | cd vendors 21 | # python3 -m venv madmomDrumsEnv doesn't work for the installation. for now keeping python2 22 | virtualenv madmomDrumsEnv 23 | 24 | #install the dependencies. 25 | madmomDrumsEnv/bin/pip install numpy 26 | madmomDrumsEnv/bin/pip install scipy 27 | madmomDrumsEnv/bin/pip install cython 28 | madmomDrumsEnv/bin/pip install nose 29 | #might fail ? 30 | sudo apt-get install python-dev 31 | madmomDrumsEnv/bin/pip install pyaudio 32 | 33 | #install from the source `http://www.ifs.tuwien.ac.at/~vogl/models/mirex-17.zip` 34 | wget http://www.ifs.tuwien.ac.at/~vogl/models/mirex-18.tar.gz 35 | tar -xvzf mirex-18.tar.gz 36 | cd madmom-0.16.dev0/ 37 | # --user seems not available anymore 38 | ../madmomDrumsEnv/bin/python setup.py develop 39 | #check if it's working 40 | cd .. 41 | madmomDrumsEnv/bin/python madmom-0.16.dev0/bin/DrumTranscriptor 42 | #clean everything 43 | rm mirex-18.tar.gz 44 | 45 | ## Usage 46 | from automix.model.classes.track import Track 47 | track = Track(path="path to audio file") 48 | cues = track.getCueIns() 49 | times = cues.times 50 | confidences = cues.values -------------------------------------------------------------------------------- /annotations/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore -------------------------------------------------------------------------------- /automix/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /automix/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | TODO: Move the functions to the correct location 3 | """ 4 | import logging as log 5 | import os 6 | 7 | DATASET_LOCATION = "/home/mickael/Documents/programming/dj-tracks-switch-points/" 8 | CACHE_LOCATION = "../annotations/" 9 | CACHE_LEVEL = 0 10 | LOG_LEVEL = log.DEBUG 11 | 12 | log.getLogger().setLevel(LOG_LEVEL) 13 | 14 | 15 | def k_fold_split(X, Y, k=10, shuffleDataset=True): 16 | """ 17 | Split both list X and Y into k folds 18 | random will shuffle the data before, so two calls would not return the same folds 19 | 20 | ex: print(k_fold_split(["A", "B", "C", "D", "E", "F", "G"], ["a", "b", "c", "d", "e", "f", "g"], k=3, shuffleDataset=0)) 21 | [[('A', 'a'), ('B', 'b')], [('C', 'c'), ('D', 'd')], [('E', 'e'), ('F', 'f'), ('G', 'g')]] 22 | """ 23 | from random import shuffle 24 | 25 | assert len(X) == len(Y) and k <= len(X) 26 | 27 | def chunkIt(seq, num): 28 | avg = len(seq) / float(num) 29 | out = [] 30 | last = 0.0 31 | 32 | while last < len(seq): 33 | out.append(seq[int(last):int(last + avg)]) 34 | last += avg 35 | 36 | return out 37 | 38 | indexes = list(range(len(X))) 39 | if shuffleDataset: 40 | shuffle(indexes) 41 | 42 | foldsIndexes = chunkIt(indexes, k) 43 | folds = [[(X[i], Y[i]) for i in foldIndexes] for foldIndexes in foldsIndexes] 44 | return folds 45 | 46 | 47 | def _getFilename(path): 48 | file, ext = os.path.splitext(os.path.basename(path)) 49 | if ext != ".mp3" and ext != ".jams": # in case that we give a file without ext but still contain a "." in the name 50 | return file + ext 51 | else: 52 | return file 53 | 54 | 55 | def _getFileType(path): 56 | """ 57 | return the extension of the file based on the path 58 | i.e.: 'MP3' or 'WAVE' 59 | """ 60 | ext = path.split("/")[-1].split(".")[-1] 61 | if ext == "mp3": 62 | return 'MP3' 63 | if ext == "wav": 64 | return "WAVE" 65 | if ext == "jams": 66 | return "JAMS" 67 | else: 68 | return ext 69 | 70 | 71 | def getFolderFiles(directory): 72 | """ 73 | returns the paths located in this folder 74 | """ 75 | paths = sorted(os.listdir(directory)) 76 | knownTypes = ["MP3", "WAVE", "mp4", "m4a", "JAMS"] 77 | return [os.path.join(directory, path) for path in paths if _getFileType(path) in knownTypes] 78 | 79 | 80 | def GET_PAOLO_FULL(checkCompletude=True, sets=["paolo1", "paolo2", "paolo3", "paolo4", "paolo5", "paolo6", "paolo7"]): 81 | """ 82 | return the path of the audio files (.mp3) and the anotation files (.jams) 83 | if checkCompletude si True, erase the tracks without annotations and erase annotation without tracks 84 | """ 85 | tracksPaths = [] 86 | for set in sets: 87 | tracksPaths += getFolderFiles(DATASET_LOCATION + str(set) + "/audio/") 88 | 89 | gtTrackPaths = getFolderFiles(DATASET_LOCATION + "clean/annotations/") 90 | if checkCompletude: 91 | tracksPaths, gtTrackPaths = CHECK_COMPLETUDE(tracksPaths, gtTrackPaths) 92 | return tracksPaths, gtTrackPaths 93 | 94 | 95 | def CHECK_COMPLETUDE(tracksPaths, gtTrackPaths): 96 | """ 97 | Check if all the files are annotated and each annotation has a file 98 | """ 99 | tracksPaths = sorted(tracksPaths, key=lambda x: _getFilename(x)) 100 | gtTrackPaths = sorted(gtTrackPaths, key=lambda x: _getFilename(x)) 101 | 102 | newTracksPaths = [track for track in tracksPaths if _getFilename(track) in [_getFilename(t) for t in gtTrackPaths]] 103 | newgtTrackPaths = [track for track in gtTrackPaths if _getFilename(track) in [_getFilename(t) for t in tracksPaths]] 104 | 105 | if len(newTracksPaths) != len(tracksPaths): 106 | log.info(("Becareful all the tracks are not annotated", len(newTracksPaths), len(tracksPaths))) 107 | log.debug("\n".join( 108 | [track for track in tracksPaths if _getFilename(track) not in [_getFilename(t) for t in gtTrackPaths]])) 109 | log.debug("\n".join( 110 | [track for track in gtTrackPaths if _getFilename(track) not in [_getFilename(t) for t in tracksPaths]])) 111 | 112 | return newTracksPaths, newgtTrackPaths 113 | -------------------------------------------------------------------------------- /automix/featureExtraction/__init__.py: -------------------------------------------------------------------------------- 1 | from .estimator import Estimator, Parameter 2 | -------------------------------------------------------------------------------- /automix/featureExtraction/automaticDrumsTranscription/__init__.py: -------------------------------------------------------------------------------- 1 | from .madmomDrumsProxy import MadmomDrumsProxy 2 | -------------------------------------------------------------------------------- /automix/featureExtraction/automaticDrumsTranscription/adtLibProxy.py: -------------------------------------------------------------------------------- 1 | # from ADTLib import ADT 2 | 3 | 4 | 5 | def getOnsets(path): 6 | """ 7 | return drums onsets in seconds computed by ADTLib (see https://github.com/CarlSouthall/ADTLib) 8 | 9 | return type : {"snare":[0.0,0.0,0.0],"hihat":[0.0,0.0,0.0],"kick":[0.0,0.0,0.0]} 10 | """ 11 | pass 12 | # firstResult = dict(ADT([path])[0]) 13 | # return {key: value.tolist() for key, value in firstResult.items()} 14 | 15 | 16 | -------------------------------------------------------------------------------- /automix/featureExtraction/automaticDrumsTranscription/madmomDrumsProxy.py: -------------------------------------------------------------------------------- 1 | import logging as log 2 | import subprocess 3 | 4 | import numpy as np 5 | from pkg_resources import resource_filename 6 | 7 | from automix.featureExtraction.estimator import Estimator, Parameter 8 | from automix.model.classes.signal import Signal 9 | 10 | 11 | class MadmomDrumsProxy(Estimator): 12 | """ 13 | call madmom version http://ifs.tuwien.ac.at/~vogl/dafx2018/ from Richard Vogl 14 | returns the time stamp of drum events. 15 | the drums id are : 16 | 0: kick 17 | 1: snare ? 18 | 2: hi hat ? 19 | """ 20 | 21 | def __init__(self, 22 | parameterModel="CRNN_3", 23 | inputPath="path", 24 | outputKick="kick", 25 | outputSnare="snare", 26 | outputHihat="hihat", 27 | cachingLevel=0, 28 | forceRefreshCache=False): 29 | super().__init__() 30 | self.parameters = {"model": Parameter(parameterModel)} 31 | self.inputs = [inputPath] 32 | self.outputs = [outputKick, outputSnare, outputHihat] 33 | self.cachingLevel = cachingLevel 34 | self.forceRefreshCache = forceRefreshCache 35 | 36 | def predictOne(self, path: str): 37 | # TODO: Is it possible to install both version of madmom ? 38 | # args = ["ls", "-l"] 39 | args = [ 40 | resource_filename(__name__, "../../../vendors/madmomDrumsEnv/bin/python"), 41 | resource_filename(__name__, "../../../vendors/madmom-0.16.dev0/bin/DrumTranscriptor"), "-m", 42 | self.parameters["model"].value, "single", path 43 | ] # Calling python from python, Yay... 44 | process = subprocess.Popen(args, stdout=subprocess.PIPE) 45 | output = process.stdout.read().decode() 46 | 47 | # TODO read stderr=subprocess.STDOUT 48 | # err = process.stderr.read().decode() 49 | # if err: 50 | # log.error(err) 51 | 52 | result = [event.split("\t") for event in output.split("\n") if event] 53 | result = [row for row in result if len(row) == 2 and self.is_number(row[0]) and self.is_number(row[1])] 54 | kicks = [float(row[0]) for row in result if row[1] == "35" or row[1] == "0"] 55 | snares = [float(row[0]) for row in result if row[1] == "38" or row[1] == "1"] 56 | hihats = [float(row[0]) for row in result if row[1] == "42" or row[1] == "2"] 57 | 58 | return (Signal(np.ones(len(kicks)), times=kicks, 59 | sparse=True), Signal(np.ones(len(snares)), times=snares, 60 | sparse=True), Signal(np.ones(len(hihats)), times=hihats, sparse=True)) 61 | 62 | def is_number(self, s): 63 | try: 64 | float(s) 65 | return True 66 | except ValueError: 67 | return False 68 | -------------------------------------------------------------------------------- /automix/featureExtraction/beats/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This package includes beat tracking and tempo finding algorithms 3 | """ 4 | from .madmomBeatDetection import MadmomBeatDetection -------------------------------------------------------------------------------- /automix/featureExtraction/beats/madmomBeatDetection.py: -------------------------------------------------------------------------------- 1 | """ 2 | container for the downbeat estimator implemented with 3 | """ 4 | # encoding: utf-8 5 | import madmom 6 | import numpy as np 7 | 8 | from automix.featureExtraction.estimator import Estimator, Parameter 9 | from automix.utils import quantization 10 | from automix.model.classes.signal import Signal 11 | import logging 12 | 13 | 14 | class MadmomBeatDetection(Estimator): 15 | """ 16 | Compute the beat, downbeat, tempo of a track 17 | 18 | Parameters: 19 | - path (string): location of the track 20 | - parameterSnapDistance=0.05: Distance threshold at which the actual beat location is discarded and the 21 | expected beat location from the tempo is used. 22 | - parameterTransitionLambda=300, default to 100 in madmom. Controls the probability that the DBN hidden state transition to another tempo 23 | - parameterCorrectToActivation=True 24 | """ 25 | 26 | def __init__( 27 | self, 28 | parameterSnapDistance=0.05, 29 | parameterTransitionLambda=300, 30 | parameterCorrectToActivation=True, 31 | inputPath="path", 32 | outputBeats="beats", 33 | outputdownbeats="downbeats", 34 | outputStrongBeats="strongBeats", 35 | outputTempo="tempo", 36 | cachingLevel=0, 37 | forceRefreshCache=False): 38 | self.parameters = { 39 | "snapDistance": Parameter(parameterSnapDistance), 40 | "transitionLambda": Parameter(parameterTransitionLambda), 41 | "correctToActivation": Parameter(parameterCorrectToActivation) 42 | } 43 | self.inputs = [inputPath] 44 | self.outputs = [outputBeats, outputdownbeats, outputStrongBeats, outputTempo] 45 | self.cachingLevel = cachingLevel 46 | self.forceRefreshCache = forceRefreshCache 47 | 48 | def plot(self, beats): 49 | import matplotlib.pyplot as plt 50 | times = beats[:, 0] 51 | plt.plot(np.diff(times)) 52 | plt.show() 53 | 54 | def predictOne(self, path): 55 | 56 | # call madmom to get beats 57 | fps = 100 58 | act = madmom.features.RNNDownBeatProcessor()(str(path)) 59 | proc = madmom.features.DBNDownBeatTrackingProcessor(beats_per_bar=[3, 4], 60 | fps=fps, 61 | transition_lambda=self.parameters["transitionLambda"].value, 62 | correct=self.parameters["correctToActivation"].value) 63 | beats = proc(act) 64 | if len([beat for i, beat in enumerate(beats) if (i + beats[0][1] - 1) % 4 + 1 != beat[1]]): 65 | logging.error("Beat detection skipped a beat") 66 | # get the tempo 67 | # evenGrids = quantization.separateInEvenGrids(beats[:, 0], regularityThreshold=self.parameters["snapDistance"].value) 68 | # longuestEvenGridIndex = np.argmax([len(grid) for grid in evenGrids]) 69 | # tau = np.average([(evenGrid[-1] - evenGrid[0]) / (len(evenGrid) - 1) for evenGrid in evenGrids if len(evenGrid) > 1], 70 | # weights=[len(evenGrid) 71 | # for evenGrid in evenGrids if len(evenGrid) > 1]) * fps # TODO: use only the longest portion ? 72 | # tempo = 60 * fps / tau 73 | # beatLength = tau / fps # i.e 0.5s 74 | # refBeat = [beat for beat in beats if beat[0] == evenGrids[longuestEvenGridIndex][0]][0] 75 | 76 | # # extend the grid of beats to remove holes in it 77 | # trackLength = float(len(act)) / fps 78 | # extendedBeats = quantization.extendGrid(refBeat, 79 | # beats, 80 | # trackLength, 81 | # beatLength, 82 | # SnapDistance=self.parameters["snapDistance"].value) 83 | tempo = 60 / np.mean(np.diff(np.array(beats)[:, 0])) 84 | 85 | # Get the confidence as the mean of the activation at each GT beat. Sums the two outputs of the NN 86 | # beat = self._getConfidence(act, beat, fps, extendedBeats) 87 | beatsT = [beat[0] for beat in beats] 88 | downbeatsT = [beat[0] for beat in beats if beat[1] == 1] 89 | strongBeatsT = [beat[0] for beat in beats if beat[1] == 1 or beat[1] == 3] 90 | return (Signal(np.ones(len(beatsT)), times=beatsT, 91 | sparse=True), Signal(np.ones(len(downbeatsT)), times=downbeatsT, 92 | sparse=True), Signal(np.ones(len(strongBeatsT)), times=strongBeatsT, 93 | sparse=True), tempo) 94 | 95 | def _getConfidence(self, act, fps, extendedBeats): 96 | # Get the confidence as the mean of the activation at each GT beat. Sums the two outputs of the NN 97 | activationPerBeat = [np.sum(act[int(beat * fps)]) for beat in np.array(extendedBeats)[:, 0]] 98 | 99 | beatConfidence = np.mean(activationPerBeat) 100 | 101 | # get the confidence between beats 102 | beatSamples = [int(beat * fps) for beat in np.array(extendedBeats)[:, 0]] 103 | beatSamples = np.append([[sample - 1, sample, sample + 1] for sample in beatSamples], []) 104 | activationBetweenBeats = [np.sum(act[i]) for i in range(len(act)) if i in beatSamples] 105 | interBeatConfidence = np.mean([np.sum(act[i]) for i in range(len(act)) if i not in beatSamples]) 106 | return beatConfidence, interBeatConfidence, activationBetweenBeats 107 | -------------------------------------------------------------------------------- /automix/featureExtraction/danceability/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MZehren/Automix/dfaa00a9e7c5c0938c0a9d275c07f3a3e5f87e43/automix/featureExtraction/danceability/__init__.py -------------------------------------------------------------------------------- /automix/featureExtraction/danceability/essentiaDanceability.py: -------------------------------------------------------------------------------- 1 | """ 2 | container for the ReadFile estimator 3 | """ 4 | import essentia.standard as estd 5 | 6 | from featureExtraction.estimator import Estimator 7 | from model.classes.signal import Signal 8 | 9 | 10 | class EssentiaDanceability(Estimator): 11 | """ 12 | estimator infering the danceability of a track from the samples 13 | The signal is split in function of the grid 14 | """ 15 | 16 | def __init__(self, inputGrid="downbeats"): 17 | self.parameters = {} 18 | self.inputs = ["samples", inputGrid] 19 | self.outputs = ["danceability", "dfa"] 20 | self.cachingLevel = 0 21 | 22 | def predictOne(self, samples, grid): 23 | myDanceability = estd.Danceability() 24 | danceabilityDfaList = [myDanceability(samples.getValues( 25 | grid[i], grid[i+1])) for i in range(len(grid)-1)] 26 | 27 | return (Signal([danceabilityDfa[0] for danceabilityDfa in danceabilityDfaList], times=grid[:-1], duration=samples.duration), 28 | Signal([danceabilityDfa[1]for danceabilityDfa in danceabilityDfaList], times=grid[:-1], duration=samples.duration)) 29 | -------------------------------------------------------------------------------- /automix/featureExtraction/harmonicPercussiveClassification/__init__.py: -------------------------------------------------------------------------------- 1 | from .hpss import Hpss -------------------------------------------------------------------------------- /automix/featureExtraction/harmonicPercussiveClassification/hpss.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import librosa 4 | import numpy as np 5 | 6 | from automix.featureExtraction.estimator import Estimator 7 | from automix.model.classes.signal import Signal 8 | 9 | 10 | class Hpss(Estimator): 11 | """ 12 | Estimator computing the hpss of given audio. 13 | """ 14 | 15 | def __init__(self, 16 | inputSamples="samples", 17 | outputHarmonic="harmonic", 18 | outputPercussive="percussive", 19 | cachingLevel=2, 20 | forceRefreshCache=False): 21 | super().__init__() 22 | self.inputs = [inputSamples] 23 | self.outputs = [outputHarmonic, outputPercussive] 24 | self.cachingLevel = cachingLevel 25 | self.forceRefreshCache = forceRefreshCache 26 | 27 | def predictOne(self, samples: Signal) -> List[Signal]: 28 | """ 29 | Computes the hpss of the given audio using librosa. 30 | """ 31 | 32 | y_harmonic, y_percussive = librosa.effects.hpss(samples.values) 33 | 34 | return (Signal(np.array(y_harmonic), sampleRate=samples.sampleRate), 35 | Signal(np.array(y_percussive), sampleRate=samples.sampleRate)) 36 | -------------------------------------------------------------------------------- /automix/featureExtraction/harmonicPercussiveClassification/io.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from model.classes.signal import Signal 6 | 7 | 8 | def readAnnotations(pathCSV, downbeats, delimiter=","): 9 | """Reads in harmonic and percussive annotations from a csv file for a song. 10 | 11 | Args: 12 | pathCSV (str): The path to the csv file containing annotations regarding 13 | harmonicness and percussiveness of the segments of the song. 14 | downbeats (list of float): The times where downbeats occur in the song. 15 | delimiter (str): The delimiter used in the csv file. 16 | 17 | Returns: 18 | tuple of Signal: A harmonic and a percussive signal containing the 19 | annotations for each bar of the song. 20 | 21 | """ 22 | 23 | # Parse annotations from csv file 24 | def convertTime(timeStr): 25 | try: 26 | timeObj = dt.datetime.strptime(timeStr, "%M:%S") 27 | except ValueError: 28 | timeObj = dt.datetime.strptime(timeStr, "%S") 29 | return dt.timedelta( 30 | minutes=timeObj.minute, seconds=timeObj.second).total_seconds() 31 | 32 | annotationsDataFrame = pd.read_csv( 33 | pathCSV, 34 | sep=delimiter, 35 | converters={ 36 | 0: convertTime, 37 | 1: convertTime, 38 | 2: int, 39 | 3: int 40 | }, 41 | skipinitialspace=True) 42 | annotations = annotationsDataFrame.values 43 | 44 | # Convert annotations for segments to annotations for bars 45 | harmonicAnnotated = [] 46 | percussiveAnnotated = [] 47 | for i in range(len(downbeats) - 1): 48 | barStart = downbeats[i] 49 | barEnd = downbeats[i + 1] 50 | candidate = (0, False, False) # overlap, harmonic, percussive 51 | for segmentStart, segmentEnd, percussive, harmonic in \ 52 | annotations[:, :4]: 53 | overlap = min(barEnd, segmentEnd) - max(barStart, segmentStart) 54 | if overlap > candidate[0]: 55 | candidate = (overlap, harmonic, percussive) 56 | harmonicAnnotated.append(candidate[1]) 57 | percussiveAnnotated.append(candidate[2]) 58 | 59 | return Signal(harmonicAnnotated, times=downbeats[:-1]), \ 60 | Signal(percussiveAnnotated, times=downbeats[:-1]) 61 | 62 | 63 | def writeFittingResults(pathCSV, matrix, header, delimiter=","): 64 | """Writes a result matrix of scores and parameter values to a csv file. 65 | 66 | Args: 67 | pathCSV (str): Path to the output csv file. 68 | matrix (2d np.array): The result matrix. 69 | header (list of str): The headers for the columns of the csv file. 70 | delimiter (str): The delimiter used in the csv file. 71 | 72 | """ 73 | 74 | dataFrame = pd.DataFrame(matrix) 75 | dataFrame.to_csv( 76 | pathCSV, 77 | sep=delimiter, 78 | float_format="%.2f", # doesn't work? 79 | header=header, 80 | index=False) -------------------------------------------------------------------------------- /automix/featureExtraction/harmonicPercussiveClassification/parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "numFFTBinsAt44100Hz": 4096, 3 | "medianWidthHarmonic": 31, 4 | "medianWidthPercussive": 31, 5 | "marginHarmonic": 8, 6 | "marginPercussive": 2, 7 | "thresholdHarmonic": -14.2, 8 | "thresholdPercussive": -10.1, 9 | "measureHarmonic": "weightedAccuracy", 10 | "measurePercussive": "weightedAccuracy", 11 | "_fit_": { 12 | "marginHarmonic": [ 13 | 1, 14 | 2, 15 | 4, 16 | 8, 17 | 16 18 | ], 19 | "marginPercussive": [ 20 | 1, 21 | 2, 22 | 4, 23 | 8, 24 | 16 25 | ] 26 | } 27 | } -------------------------------------------------------------------------------- /automix/featureExtraction/key/__init__.py: -------------------------------------------------------------------------------- 1 | from .key import Key -------------------------------------------------------------------------------- /automix/featureExtraction/key/edmkeyProxy.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is an adaptor for edmkey (https://github.com/angelfaraldo/edmkey). 3 | """ 4 | 5 | import os 6 | 7 | # edmkey is not a package. adjusting PYTHONPATH is necessary 8 | 9 | try: 10 | from vendors.edmkey import edmkey, evaluation 11 | except ImportError: 12 | raise ImportError( 13 | "Couldn't import edmkey. The edmkey repository might be missing. Use\n" 14 | "git submodule init\n" 15 | "git submodule update.") 16 | 17 | 18 | def setParameters(parameters): 19 | """Sets the constants in edmkey.py according to the provided parameter 20 | dictionary. 21 | 22 | Args: 23 | parameters: The dictionary containint the parameters. 24 | 25 | Sets: 26 | The constants in edmkey.py (too many to list explicitly). 27 | 28 | """ 29 | 30 | def getValue(key): 31 | return parameters[key].value 32 | 33 | # file settings 34 | edmkey.SAMPLE_RATE = getValue("sampleRate") 35 | edmkey.VALID_FILE_TYPES = set(getValue("validFileTypes")) 36 | # analysis parameters 37 | edmkey.HIGHPASS_CUTOFF = getValue("highpassCutoff") 38 | edmkey.SPECTRAL_WHITENING = getValue("spectralWhitening") 39 | edmkey.DETUNING_CORRECTION = getValue("detuningCorrection") 40 | edmkey.DETUNING_CORRECTION_SCOPE = getValue("detuningCorrectionScope") 41 | edmkey.PCP_THRESHOLD = getValue("PCPThreshold") 42 | edmkey.WINDOW_SIZE = getValue("windowSize") 43 | edmkey.HOP_SIZE = getValue("hopSize") 44 | edmkey.WINDOW_SHAPE = getValue("windowShape") 45 | edmkey.MIN_HZ = getValue("minHz") 46 | edmkey.MAX_HZ = getValue("maxHz") 47 | edmkey.SPECTRAL_PEAKS_THRESHOLD = getValue("spectralPeaksThreshold") 48 | edmkey.SPECTRAL_PEAKS_MAX = getValue("spectralPeaksMax") 49 | edmkey.HPCP_BAND_PRESET = getValue("HPCPBandPreset") 50 | edmkey.HPCP_SPLIT_HZ = getValue("HPCPSplitHz") 51 | edmkey.HPCP_HARMONICS = getValue("HPCPHarmonics") 52 | edmkey.HPCP_NON_LINEAR = getValue("HPCPNonLinear") 53 | edmkey.HPCP_NORMALIZE = getValue("HPCPNormalize") 54 | edmkey.HPCP_SHIFT = getValue("HPCPShift") 55 | edmkey.HPCP_REFERENCE_HZ = getValue("HPCPReferenceHz") 56 | edmkey.HPCP_SIZE = getValue("HPCPSize") 57 | edmkey.HPCP_WEIGHT_WINDOW_SEMITONES = getValue("HPCPWeightWindowSemitones") 58 | edmkey.HPCP_WEIGHT_TYPE = getValue("HPCPWeightType") 59 | # key detector method 60 | edmkey.KEY_PROFILE = getValue("keyProfile") 61 | edmkey.USE_THREE_PROFILES = getValue("useThreeProfiles") 62 | edmkey.WITH_MODAL_DETAILS = getValue("withModalDetails") 63 | 64 | 65 | def estimateKey(path, separator=" ", tempOutputFile="edmkey.temp"): 66 | """Estimates the key of a given audio file. 67 | 68 | Args: 69 | path (str): The absolute path to the audio file. 70 | separator (str): The separating char between key name and mode. 71 | tempOutputFile (str): The relative or absolute path to a temporary file, 72 | that will be created and directly deleted afterwards. 73 | 74 | Returns: 75 | str: The key of the audio file. 76 | 77 | """ 78 | 79 | # estimate key. create a file with the key as side effect 80 | key = edmkey.estimate_key(path, tempOutputFile) 81 | # delete this file 82 | os.remove(tempOutputFile) 83 | # replace the default separator and return key 84 | return key.replace("\t", separator) 85 | 86 | 87 | def convertKey(keyString): 88 | """Converts a key given as string to a formal representation as pair of 89 | integers (for name and mode of the key). 90 | 91 | Args: 92 | keyString (str): The key as string. 93 | 94 | Returns: 95 | list of int: The formal representation of the key. 96 | 97 | """ 98 | 99 | return evaluation.key_to_list(keyString) 100 | 101 | 102 | def mirexScore(estimation, groundtruth): 103 | """Calculates the mirex score of an estimated key given the correct key. 104 | 105 | Args: 106 | estimation (list of int): The estimated key in formal representation. 107 | groundtruth (list of int): The correct key in formal representation. 108 | 109 | Returns: 110 | float: The mirex score. 111 | 112 | """ 113 | 114 | return evaluation.mirex_score(estimation, groundtruth) -------------------------------------------------------------------------------- /automix/featureExtraction/key/key.py: -------------------------------------------------------------------------------- 1 | from automix.featureExtraction import Estimator, Parameter 2 | import madmom 3 | 4 | 5 | class Key(Estimator): 6 | """ 7 | Estimator calculating the key of a given audio. 8 | """ 9 | 10 | def __init__(self, input="path", output="key", cachingLevel=0, forceRefreshCache=False): 11 | self.inputs = [input] 12 | self.outputs = [output] 13 | self.cachingLevel = cachingLevel 14 | self.forceRefreshCache = forceRefreshCache 15 | self.parameters = {} 16 | 17 | def predictOne(self, path): 18 | """Estimates the key of a given audio file. 19 | 20 | Args: 21 | path (str): The absolute path of the audio file. 22 | 23 | Returns: 24 | tuple of str: The key of the audio file. 25 | 26 | """ 27 | return self.madmomKey(path) 28 | 29 | def madmomKey(self, file): 30 | proc = madmom.features.key.CNNKeyRecognitionProcessor() 31 | proba = proc(file) 32 | key = madmom.features.key.key_prediction_to_label(proba) 33 | return (key, ) 34 | 35 | def edmKey(self): 36 | """ 37 | Deprecated 38 | """ 39 | raise DeprecationWarning() 40 | from featureExtraction.key import edmkeyProxy as edm 41 | self.loadParams() 42 | edm.setParameters(self.parameters) 43 | key = edm.estimateKey(path, self.getValue("separator")) 44 | return (key, ) -------------------------------------------------------------------------------- /automix/featureExtraction/key/parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "sampleRate": 44100, 3 | "validFileTypes": [ 4 | ".wav", 5 | ".mp3", 6 | ".flac", 7 | ".aiff", 8 | ".ogg" 9 | ], 10 | "highpassCutoff": 200, 11 | "spectralWhitening": true, 12 | "detuningCorrection": true, 13 | "detuningCorrectionScope": "average", 14 | "PCPThreshold": 0.2, 15 | "windowSize": 4096, 16 | "hopSize": 4096, 17 | "windowShape": "hann", 18 | "minHz": 25, 19 | "maxHz": 3500, 20 | "spectralPeaksThreshold": 0.0001, 21 | "spectralPeaksMax": 60, 22 | "HPCPBandPreset": false, 23 | "HPCPSplitHz": 250, 24 | "HPCPHarmonics": 4, 25 | "HPCPNonLinear": false, 26 | "HPCPNormalize": "none", 27 | "HPCPShift": false, 28 | "HPCPReferenceHz": 440, 29 | "HPCPSize": 12, 30 | "HPCPWeightWindowSemitones": 1, 31 | "HPCPWeightType": "cosine", 32 | "keyProfile": "bgate", 33 | "useThreeProfiles": true, 34 | "withModalDetails": true, 35 | "separator": " " 36 | } -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/__init__.py: -------------------------------------------------------------------------------- 1 | from .coreFinder import CoreFinder 2 | from .cqt import Cqt 3 | from .normalize import Normalize 4 | from .onsetDetection import OnsetDetection 5 | from .pcp import Pcp 6 | from .peakPicking import PeakPicking 7 | from .peakSelection import PeakSelection 8 | from .periodicity import Periodicity 9 | from .quantize import Quantize 10 | from .readFile import ReadFile, GetDuration 11 | from .replayGain import ReplayGain 12 | from .windowing import Windowing 13 | 14 | -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/coreFinder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Get the peaks exceding a threshold. Returns only the maximum value in the threshold exceed 3 | """ 4 | 5 | import sys 6 | from typing import List 7 | 8 | import numpy as np 9 | from scipy.ndimage import filters 10 | 11 | from automix.featureExtraction.estimator import Estimator, Parameter 12 | from automix.model.classes.signal import Signal, SparseSegmentSignal 13 | 14 | 15 | class CoreFinder(Estimator): 16 | def __init__(self, 17 | inputValues="samples", 18 | inputGrid="period", 19 | parameterIncludeBorders=True, 20 | outputPeaks="core", 21 | cachingLevel=0, 22 | forceRefreshCache=False): 23 | """Look at the rms of the signal for a segment and label it as a core if it's abov the rms of the full track 24 | 25 | """ 26 | self.parameters = {"includeBorders": Parameter(parameterIncludeBorders)} 27 | self.inputs = [inputValues, inputGrid] 28 | self.outputs = [outputPeaks] 29 | self.cachingLevel = cachingLevel 30 | self.forceRefreshCache = forceRefreshCache 31 | 32 | def _rms(self, values): 33 | return np.sqrt(np.mean(np.square(values), axis=0)) 34 | 35 | def predictOne(self, values: Signal, grid: Signal): 36 | mean = self._rms(values) 37 | times = grid.times 38 | if self.parameters["includeBorders"].value: 39 | times = [0] + list(times) + [99999] 40 | positionTuples = [(times[i], times[i + 1]) for i in range(len(times) - 1)] 41 | 42 | result = SparseSegmentSignal([self._rms(values.getValues(start, stop)) > mean for start, stop in positionTuples], 43 | [(start, stop) for start, stop in positionTuples]) 44 | return (result, ) 45 | -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/cqt.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import librosa 4 | import numpy as np 5 | 6 | from automix.featureExtraction.estimator import Estimator, Parameter 7 | from automix.model.classes.signal import Signal 8 | 9 | 10 | class Cqt(Estimator): 11 | """ 12 | Estimator calculating the cqt of given audio. 13 | 14 | parameter Scale possible values: 15 | Amplitude 16 | Power 17 | MSAF 18 | Power dB 19 | Perceived dB 20 | """ 21 | 22 | def __init__(self, 23 | parameterHopLength=512, 24 | parameterBinNumber=84, 25 | parameterScale="Power", 26 | inputSamples="samples", 27 | outputCqt="cqt", 28 | cachingLevel=2, 29 | forceRefreshCache=False): 30 | self.parameters = { 31 | "hopLength": Parameter(parameterHopLength), 32 | "binNumber": Parameter(parameterBinNumber), 33 | "scale": Parameter(parameterScale) 34 | } 35 | self.inputs = [inputSamples] 36 | self.outputs = [outputCqt] 37 | self.cachingLevel = cachingLevel 38 | self.forceRefreshCache = forceRefreshCache 39 | 40 | def predictOne(self, samples: Signal): 41 | """Calculates the cqt of the given audio using librosa. 42 | 43 | Args: 44 | samples (Signal): The samples of the audio. 45 | grid (list of float): The . 46 | 47 | Returns: 48 | tuple of List[float]: The cqt of the audio. 49 | 50 | """ 51 | sr = samples.sampleRate 52 | hop_length = self.parameters["hopLength"].value 53 | n_bins = self.parameters["binNumber"].value 54 | cqt_sr = sr / hop_length 55 | cqt = librosa.cqt(samples.values, sr=sr, hop_length=hop_length, n_bins=n_bins) 56 | linear_cqt = np.abs(cqt) 57 | 58 | if self.parameters["scale"].value == "Amplitude": 59 | result = linear_cqt 60 | elif self.parameters["scale"].value == "Power": 61 | result = linear_cqt**2 62 | elif self.parameters["scale"].value == "MSAF": 63 | result = librosa.amplitude_to_db(linear_cqt**2, ref=np.max) 64 | result += np.min(result) * -1 # Inverting the db scale (don't know if this is correct) 65 | elif self.parameters["scale"].value == "Power dB": 66 | result = librosa.amplitude_to_db(linear_cqt, ref=np.max) # Based on Librosa, standard power spectrum in dB 67 | result += np.min(result) * -1 68 | elif self.parameters["scale"].value == "Perceived dB": 69 | freqs = librosa.cqt_frequencies(linear_cqt.shape[0], fmin=librosa.note_to_hz('C1')) 70 | result = librosa.perceptual_weighting(linear_cqt**2, freqs, ref=np.max) 71 | result += np.min(result) * -1 72 | else: 73 | raise ValueError("parameterScale is not a correct value") 74 | 75 | return (Signal(result.T, sampleRate=cqt_sr), ) 76 | -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/normalize.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import List 3 | 4 | import librosa 5 | import numpy as np 6 | 7 | from automix.featureExtraction.estimator import Estimator 8 | from automix.model.classes.signal import Signal 9 | 10 | 11 | class Normalize(Estimator): 12 | """ 13 | Estimator computing the hpss of given audio. 14 | """ 15 | 16 | def __init__(self, inputSamples="barMSE", outputNormalizedSamples="normalizedBarMSE", cachingLevel=2, 17 | forceRefreshCache=False): 18 | super().__init__() 19 | self.inputs = [inputSamples] 20 | self.outputs = [outputNormalizedSamples] 21 | self.cachingLevel = cachingLevel 22 | self.forceRefreshCache = forceRefreshCache 23 | 24 | def predictOne(self, samples: Signal) -> Signal: 25 | """ 26 | Normalize the signal's values 27 | """ 28 | max = np.max(samples.values) 29 | min = np.min(samples.values) 30 | result = copy.copy(samples) 31 | 32 | result.values = ((samples.values - min) / (max - min)).tolist() 33 | return (result, ) 34 | -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/onsetDetection.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import librosa 4 | import numpy as np 5 | 6 | from automix.featureExtraction.estimator import Estimator, Parameter 7 | from automix.model.classes.signal import Signal 8 | 9 | 10 | class OnsetDetection(Estimator): 11 | """ 12 | Estimator calculating the cqt of given audio. 13 | """ 14 | 15 | def __init__(self, 16 | inputSamples="samples", 17 | outputOnsetDetection="onsetDetection", 18 | parameterHopLength=512, 19 | parameterBacktrack="True", 20 | cachingLevel=2, 21 | forceRefreshCache=False): 22 | self.parameters = {"hopLength": Parameter(parameterHopLength), "backtrack": Parameter(parameterBacktrack)} 23 | self.inputs = [inputSamples] 24 | self.outputs = [outputOnsetDetection] 25 | self.cachingLevel = cachingLevel 26 | self.forceRefreshCache = forceRefreshCache 27 | 28 | def predictOne(self, samples: Signal): 29 | """TODO 30 | """ 31 | hopLength = self.parameters["hopLength"].value 32 | onsets = librosa.onset.onset_detect(y=samples.values, sr=samples.sampleRate, hop_length=hopLength, backtrack=self.parameters["backtrack"].value) 33 | result = Signal(samples[onsets], times=[samples.getTime(onset * hopLength) for onset in onsets], sparse=True) 34 | return (result, ) 35 | -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/pcp.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import librosa 4 | import numpy as np 5 | 6 | from automix.featureExtraction.estimator import Estimator, Parameter 7 | from automix.model.classes.signal import Signal 8 | 9 | 10 | class Pcp(Estimator): 11 | """ 12 | Estimator calculating the cqt of given audio. 13 | 14 | parameter Scale possible values: 15 | Amplitude 16 | Power 17 | MSAF 18 | Power dB 19 | Perceived dB 20 | """ 21 | 22 | def __init__( 23 | self, 24 | parameterHopLength=512, 25 | parameterNieto=False, # Use nieto's implementation 26 | # parameterScale="Power", 27 | inputSamples="samples", 28 | outputPcp="pcp", 29 | cachingLevel=2, 30 | forceRefreshCache=False): 31 | self.parameters = {"hopLength": Parameter(parameterHopLength), "nieto": Parameter(parameterNieto)} 32 | self.inputs = [inputSamples] 33 | self.outputs = [outputPcp] 34 | self.cachingLevel = cachingLevel 35 | self.forceRefreshCache = forceRefreshCache 36 | 37 | def predictOne(self, samples: Signal): 38 | """Calculates the pcp of the given audio using linrosa.feature.chroma_stft 39 | 40 | Args: 41 | samples (Signal): The samples of the audio. 42 | 43 | Returns: 44 | tuple (1,Signal(samples, bins)): The pcp of the audio. 45 | """ 46 | if self.parameters["nieto"].value: 47 | return self.nietoPCP(samples) 48 | else: 49 | return self.chromagram(samples) 50 | 51 | def nietoPCP(self, samples: Signal): 52 | sr = samples.sampleRate 53 | hop_length = self.parameters["hopLength"].value 54 | pcp_sr = sr / hop_length 55 | 56 | audio_harmonic, _ = librosa.effects.hpss(samples.values) 57 | # I double checked, and the parameters are the one used in MSAF. 7 octave in pcp_cqt and 6 octaves in pcp 58 | pcp_cqt = np.abs(librosa.hybrid_cqt(audio_harmonic, sr=sr, hop_length=hop_length, n_bins=7 * 12, norm=np.inf, 59 | fmin=27.5))**2 60 | pcp = librosa.feature.chroma_cqt(C=pcp_cqt, sr=sr, hop_length=hop_length, n_octaves=6, fmin=27.5).T 61 | 62 | return (Signal(pcp, sampleRate=pcp_sr), ) 63 | 64 | def chromagram(self, samples: Signal): 65 | sr = samples.sampleRate 66 | result = librosa.feature.chroma_stft(y=samples.values, sr=sr) 67 | hop_length = self.parameters["hopLength"].value 68 | pcp_sr = sr / hop_length 69 | 70 | return (Signal(result.T, sampleRate=pcp_sr), ) 71 | -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/peakPicking.py: -------------------------------------------------------------------------------- 1 | """ 2 | Get the peaks exceding a threshold. Returns only the maximum value in the threshold exceed 3 | """ 4 | 5 | from typing import List 6 | 7 | import numpy as np 8 | from scipy.ndimage import filters 9 | 10 | from automix.featureExtraction.estimator import Estimator, Parameter 11 | from automix.model.classes.signal import Signal 12 | 13 | 14 | class PeakPicking(Estimator): 15 | def __init__(self, 16 | parameterMedianSize=16, 17 | parameterRelativeThreshold=0.5, 18 | parameterThresholdIndex=1, 19 | parameterMinDistance=0, 20 | inputSignal="barMSE", 21 | outputPeaks="peaks", 22 | cachingLevel=2, 23 | forceRefreshCache=False): 24 | """ 25 | Estimator computing returning peaks from a signal. 26 | Based on the method from MAXIMUM FILTER VIBRATO SUPPRESSION FOR ONSET DETECTION 2013 27 | 28 | Parameters 29 | ---------- 30 | parameterRelativeThreshold (optional float): 31 | return the maximum value of a continuous window exceeding the threshold in % of the max value 32 | 33 | parameterThresholdIndex 34 | Limit the search of the maximum value in this part of the signal (1 = 100% of the track, 0.5 = 50% from the start of 35 | the track) 36 | 37 | parameterMinDistance 38 | return the highest peaks in a window of this size, 39 | This value filter peaks within distance striclty inferior. 40 | (If min distance is set to 8 ticks, two peaks 8 ticks appart can be return) 41 | 42 | parameterMedianSize 43 | When computing peaks without a static threshold 44 | 45 | 46 | windowSize (optional int): 47 | 48 | 49 | distance (optional int) 50 | min distance in indexes between two peaks 51 | """ 52 | self.parameters = { 53 | "medianSize": Parameter(parameterMedianSize), 54 | "relativeThreshold": Parameter(parameterRelativeThreshold), 55 | "thresholdIndex": Parameter(parameterThresholdIndex), 56 | "minDistance": Parameter(parameterMinDistance) 57 | } 58 | self.inputs = [inputSignal] 59 | self.outputs = [outputPeaks] 60 | self.cachingLevel = cachingLevel 61 | self.forceRefreshCache = forceRefreshCache 62 | 63 | def predictOne(self, values: Signal): 64 | listV = np.array(values.values) 65 | if self.parameters["relativeThreshold"].value: 66 | # compute the thrshold at x times the maximum value 67 | threshold = np.max( 68 | listV[:int(len(listV) * self.parameters["thresholdIndex"].value)]) * self.parameters["relativeThreshold"].value 69 | peaks, peaksValues = self.staticThreshold(listV, threshold, self.parameters["minDistance"].value) 70 | else: 71 | peaks, peaksValues = self.adaptiveThreshold(listV, L=self.parameters["medianSize"].value) 72 | 73 | result = Signal(peaksValues, times=[values.times[peak] for peak in peaks], sparse=True) 74 | return (result, ) 75 | 76 | def staticThreshold(self, values: List, threshold: float, minDistance: int): 77 | """ 78 | get the peaks in the list of values. return a list of idx and a list of values 79 | 80 | pk = PeakPicking(minDistance=2, threshold=5) 81 | print(pk.predictOne([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])) -> ([9], [10]) 82 | print(pk.predictOne([10, 2, 3, 4, 5, 6, 7, 8, 9, 10])) -> ([0,9], [10,10]) 83 | 84 | Parameters: 85 | signal (Signal): Signal to extract peak from 86 | 87 | Returns: 88 | boundaries (List): The index of the peaks 89 | 90 | values (List): The value of the peaks 91 | """ 92 | peaksValue = [] 93 | peaksPosition = [] 94 | mySortedList = sorted([(i, value) for i, value in enumerate(values)], key=lambda x: x[1], reverse=True) 95 | for i, value in mySortedList: 96 | # Check if the value is > threshold 97 | if value >= threshold: 98 | # Check if it's the maximum in a window size 99 | # TODO: simplify the implementation like meanThreshold 100 | isMaximum = value == np.max(values[max(i - minDistance, 0):i + minDistance + 1]) 101 | if isMaximum: 102 | peaksValue.append(value) 103 | peaksPosition.append(i) 104 | else: 105 | break 106 | 107 | return peaksPosition, peaksValue 108 | 109 | def meanThreshold(self, values: List, threshold: float, minDistance: int): 110 | """ 111 | get the peaks in the list of values. return a list of idx and a list of values 112 | 113 | pk = PeakPicking(minDistance=2, threshold=5) 114 | print(pk.predictOne([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])) -> ([9], [10]) 115 | print(pk.predictOne([10, 2, 3, 4, 5, 6, 7, 8, 9, 10])) -> ([0,9], [10,10]) 116 | 117 | Parameters: 118 | signal (Signal): Signal to extract peak from 119 | 120 | Returns: 121 | boundaries (List): The index of the peaks 122 | 123 | values (List): The value of the peaks 124 | """ 125 | peaksValue = [] 126 | peaksPosition = [] 127 | lookdistance = 1 128 | mySortedList = sorted([(i, value) for i, value in enumerate(values)], key=lambda x: x[1], reverse=True) 129 | for i, value in mySortedList: #For all the values by decreasing order TODO: implement a different distance to the peak ? 130 | if value >= threshold: 131 | isMaximum = value == np.max(values[max(i - minDistance, 0):i + minDistance + 1]) 132 | isAboveMean = value >= np.mean(values[max(i - minDistance, 0):i + minDistance + 1]) + threshold 133 | if isMaximum and isAboveMean: 134 | peaksValue.append(value) 135 | peaksPosition.append(i) 136 | else: 137 | break 138 | 139 | return peaksPosition, peaksValue 140 | 141 | def adaptiveThreshold(self, nc: List, L=16): 142 | """ 143 | Obtain peaks from a novelty curve using an adaptive threshold. 144 | Foote 2000's method, implementation by msaf 145 | """ 146 | offset = nc.mean() / 20. 147 | 148 | smooth_nc = filters.gaussian_filter1d(nc, sigma=4) # Smooth out nc 149 | 150 | th = filters.median_filter(smooth_nc, size=L) + offset 151 | # th = filters.gaussian_filter(nc, sigma=L/2., mode="nearest") + offset 152 | 153 | peaks = [] 154 | for i in range(1, smooth_nc.shape[0] - 1): 155 | # is it a peak? 156 | if smooth_nc[i - 1] < smooth_nc[i] and smooth_nc[i] > smooth_nc[i + 1]: 157 | # is it above the threshold? 158 | if smooth_nc[i] > th[i]: 159 | peaks.append(i) 160 | 161 | # plt.plot(old) 162 | # plt.plot(nc) 163 | # plt.plot(th) 164 | # for peak in peaks: 165 | # plt.axvline(peak) 166 | # plt.show() 167 | 168 | return peaks, [nc[peak] for peak in peaks] 169 | 170 | 171 | 172 | # TODO: Implement the aggregation of the peaks from multiple features to multiply. 173 | # aggregation: 174 | # independant: tells if the selected peaks are from all the features independently 175 | # multiply: The peaks values are multiplied together and works only when the features agree with each others 176 | 177 | # newCurve = np.ones(len(track.features[features[0]].values)) 178 | # for feature in features: 179 | # newCurve = np.multiply(newCurve, track.features[feature].values) 180 | # peakSignals = pp.predictOne(Signal(newCurve, times=track.features[features[0]].times)) 181 | # newCues = peakSignals[0].times 182 | -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/peakSelection.py: -------------------------------------------------------------------------------- 1 | """ 2 | From multiple list of peaks, returns the k-first peaks timewise. 3 | 4 | Add functionnalities to 5 | """ 6 | 7 | import collections 8 | import logging as log 9 | from typing import List 10 | 11 | import numpy as np 12 | from scipy.ndimage import filters 13 | 14 | from automix.featureExtraction.estimator import Estimator, Parameter 15 | from automix.model.classes.signal import Signal 16 | 17 | 18 | class PeakSelection(Estimator): 19 | """ 20 | Merge from a list of signals all the values in one signal. 21 | 22 | * parameterClusterDistance: merge the values close to each other in the same value 23 | * parameterRelativeDistance: Discard the values with a position exceeding % pf the duration of the track 24 | * parameterSalienceTreshold: Discard the position where the segment in the grid following it has a value below this threshold in the inputSalience 25 | * parameterSalienceWindow: The size of the grid window to compute the salience 26 | * parameterMergeFunction: How the value of the merged peaks is computed 27 | * parameterAbsoluteTop: Absolute number of peaks to select. Set to None to keep all the peaks 28 | """ 29 | 30 | def __init__( 31 | self, 32 | parameterAbsoluteTop=None, # Absolute number of peaks to select. Set to None to kepp all the peaks 33 | parameterClusterDistance=0, # The distance in seconds to cluster multiple features occuring at the same time. 34 | parameterMergeFunction=np.sum, # How the clustered peaks' values are merged 35 | parameterRelativeDistance=1, # Return peaks only within in the beginning % of the track 36 | parameterSalienceTreshold=0, # Return peaks preceding a segment having at least this quantity in the Salience feature 37 | parameterSalienceWindow=8, # the size of the window suceeding the peak to compute the salience 38 | inputPeaks=["cqtAmplitudeCheckerboardPeaks"], # The peaks filtered 39 | inputGrid="strongBeats", # The grid to compute the salience window and the duration of the track 40 | inputSalience=["cqtAmplitudeRMSE"], # The list of features used for the salience 41 | outputPeaks="selectedPeaks", # Name of the output 42 | outputNonSalient="nonSalientPeaks", 43 | cachingLevel=0, 44 | forceRefreshCache=True): #As long as there is no way of updating the cache when the input changes 45 | """ 46 | Estimator selecting peaks from multiple list of peaks. 47 | """ 48 | self.parameters = { 49 | "absoluteTop": Parameter(parameterAbsoluteTop), 50 | "clusterDistance": Parameter(parameterClusterDistance), 51 | "relativeDistance": Parameter(parameterRelativeDistance), 52 | "salienceTreshold": Parameter(parameterSalienceTreshold), 53 | "salienceWindow": Parameter(parameterSalienceWindow), 54 | "mergeFunction": Parameter(parameterMergeFunction) 55 | } 56 | self.inputs = [inputPeaks, inputGrid, inputSalience] 57 | self.outputs = [outputPeaks, outputNonSalient] 58 | self.cachingLevel = cachingLevel 59 | self.forceRefreshCache = forceRefreshCache 60 | 61 | def predictOne(self, peakSignals: List[Signal], grid: Signal, salienceSignals: List[Signal]): 62 | # Cluster the peaks to remove close outliers 63 | peaks = Signal.clusterSignals(peakSignals, 64 | minDistance=self.parameters["clusterDistance"].value, 65 | mergeValue=self.parameters["mergeFunction"].value) 66 | 67 | # Get the Salience of the following segment 68 | peaks, nonSalientPeaks = self.getSalientPoints(salienceSignals, grid, peaks) 69 | 70 | # Filter the peaks too far away from the start of the track 71 | peaks = self.getEarlyPeaks(peaks, grid) 72 | 73 | # Get the first absolute k-beats TODO: Set the selection to an "or" ? -> I don't like it so much, because we can't 74 | # disable the position filtering with an or 75 | peaks = Signal(peaks.values[:self.parameters["absoluteTop"].value], 76 | times=peaks.times[:self.parameters["absoluteTop"].value], 77 | sparse=True) 78 | 79 | return (peaks, nonSalientPeaks) 80 | 81 | def getEarlyPeaks(self, peaks, grid): 82 | """ 83 | Filter the peaks by relative distance from the start 84 | """ 85 | if self.parameters["relativeDistance"].value < 1: 86 | earlyPeaks = [ 87 | i for i, pos in enumerate(peaks.times) if pos <= grid.duration * self.parameters["relativeDistance"].value 88 | ] 89 | # if len(earlyPeaks) == 0: 90 | # earlyPeaks = [peaks[0]] 91 | peaks = Signal([peaks.values[i] for i in earlyPeaks], times=[peaks.times[i] for i in earlyPeaks]) 92 | return peaks 93 | 94 | def getSalientPoints(self, salienceSignals, grid, peaks): 95 | """ 96 | split peaks signal into two: 97 | Salient points, and non-salient points 98 | """ 99 | if self.parameters["salienceTreshold"].value: 100 | salience = [ 101 | self.getSalience(pos, salienceSignals, grid, self.parameters["salienceWindow"].value) for pos in peaks.times 102 | ] 103 | salientPoints = [i for i, v in enumerate(salience) if v >= self.parameters["salienceTreshold"].value] 104 | nonSalientPoints = [i for i, v in enumerate(salience) if v < self.parameters["salienceTreshold"].value] 105 | 106 | # if there is no point above the threshold of salience, just return the most salient one 107 | if len(salientPoints) == 0 and len(salience) > 0: 108 | salientPoints = [np.argmax(salience)] 109 | nonSalientPoints = [p for p in nonSalientPoints if p not in salientPoints] 110 | 111 | nonSalient = Signal([peaks.values[i] for i in nonSalientPoints], 112 | times=[peaks.times[i] for i in nonSalientPoints], 113 | sparse=True) 114 | peaks = Signal([peaks.values[i] for i in salientPoints], times=[peaks.times[i] for i in salientPoints]) 115 | return peaks, nonSalient 116 | else: 117 | return peaks, Signal([], times=[]) 118 | 119 | def getSalience(self, point, features: List[Signal], grid: Signal, window): 120 | """ 121 | Return a salience of the window following the point 122 | """ 123 | score = 0 124 | for feature in features: 125 | try: 126 | amount = feature.getValues(point, grid.getTime(grid.getIndex(point) + window)) 127 | except IndexError as e: 128 | amount = [0] #TODO sometimes the posiiton is beyond the grid ? 129 | 130 | score += np.mean(amount) if len(amount) else 0 131 | return score / len(features) if len(features) != 0 else 0 132 | -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/periodicity.py: -------------------------------------------------------------------------------- 1 | """ 2 | container for the Periodicity estimator 3 | """ 4 | from typing import List 5 | 6 | import librosa 7 | import numpy as np 8 | 9 | from automix.featureExtraction.estimator import Estimator, Parameter 10 | from automix.model.classes.signal import Signal 11 | 12 | 13 | class Periodicity(Estimator): 14 | """ 15 | Estimator infering the periodicity of a track 16 | 17 | inputFeatures: List of signals 18 | The signals can be sparse or dense. The amplitude and the number of values on the period are taken into account. 19 | 20 | outputPeriod: A sparse signal with a value at each point on the period. 21 | 22 | paraeterDistanceMetric: compute how the values for the peaks is determined 23 | RMS, SUM, Veire=SUM+Mult 24 | 25 | parameterFeatureAggregation: how the feature are aggregated 26 | qualitatively = by counting the number of features in agreement 27 | Quantitatively = by summing the score of each feature 28 | """ 29 | 30 | def __init__(self, 31 | inputFeatures=["cqtAmplitudeCheckerboard"], 32 | inputGrid="strongBeats", 33 | outputPeriod="period", 34 | parameterDistanceMetric="RMS", 35 | parameterFeatureAggregation="quantitative", 36 | parameterPeriod=2, 37 | cachingLevel=2, 38 | forceRefreshCache=True): #As long as there is no way of updating the cache when the input changes 39 | self.inputs = [inputFeatures, inputGrid] 40 | self.outputs = [outputPeriod] 41 | self.parameters = {"period": Parameter(parameterPeriod), "distanceMetric": Parameter(parameterDistanceMetric), "featureAggregation": Parameter(parameterFeatureAggregation)} 42 | self.cachingLevel = cachingLevel 43 | self.forceRefreshCache = forceRefreshCache 44 | 45 | def predictOne(self, inputFeatures: List[Signal], inputGrid: Signal): 46 | # for period in self.parameters["period"].value: 47 | period = self.parameters["period"].value 48 | phase = self.getPhase(period, inputFeatures, inputGrid) 49 | 50 | return (Signal(inputGrid.values[phase::period], times=inputGrid.times[phase::period], sparse=True), ) 51 | 52 | def getPhase(self, period, features, inputGrid): 53 | """ 54 | Get the phase of the track depending on all the features specified and the period 55 | TODO: The phase should be computed with all the features combined. not with all the features independent 56 | """ 57 | # Equal weight per feature 58 | if self.parameters["featureAggregation"].value == "quantitative": 59 | phasePerFeature = [] 60 | for feature in features: 61 | bestPhase = np.argmax(self.findPhaseLocal(period, feature, inputGrid)) 62 | phasePerFeature.append(bestPhase) 63 | counts = np.bincount(phasePerFeature) 64 | quantitative = np.argmax(counts) 65 | return quantitative 66 | elif self.parameters["featureAggregation"].value == "qualitative": 67 | # Equal weight maybe ? but qualitative value per phase for each feature 68 | overalScore = np.zeros(period) 69 | for feature in features: 70 | score = self.findPhaseLocal(period, feature, inputGrid) 71 | overalScore = np.add(score, overalScore) 72 | qualitative = np.argmax(overalScore) 73 | return qualitative 74 | else: 75 | raise Exception("bad feature aggregation parameter") 76 | 77 | # different weight per feature 78 | # binValues = [] 79 | # for phase in range(period): 80 | # binValues.append([]) 81 | # for feature in features: 82 | # binValues[phase] = [feature.getValue(inputGrid.times[i]) for i in range(phase, len(inputGrid), period)] 83 | # binValues[phase] = [v for v in binValues[phase] if v is not None] 84 | # # Veire's method. the best candidate is maximizing the number of peaks in phase AND the amplitude of the peaks 85 | # binProduct = [np.sum(values) * len(values) for values in binValues] 86 | # return np.argmax(binProduct) 87 | 88 | def findPhaseLocal(self, period: int, signal: Signal, grid: Signal, toleranceWindow=0.1): 89 | """ 90 | find the phase of the signal based on it's amplitude at the grid positions and the number of peaks 91 | - signal: works best with a discrete signal as no aglomeration is done 92 | - grid: positions of the beats 93 | - period: the periodicity to test 94 | - tolerance window: if not at 0, returns the closest value in the signal to the grid, within the tolerance window 95 | 96 | test: 97 | # result = findPhase(Signal(np.ones(5), times=np.array([0, 4, 8, 9, 12])+1), Signal(np.ones(16), times=range(16)), 98 | period=4) 99 | # print(result) = 1 100 | """ 101 | phases = [] 102 | for phase in range(period): 103 | values = [signal.getValue(grid.times[i], toleranceWindow=toleranceWindow) for i in range(phase, len(grid), period)] 104 | values = [v for v in values if v is not None] 105 | if self.parameters["distanceMetric"].value == "RMS": 106 | value = np.sqrt(np.mean(np.array(values)**2)) 107 | elif self.parameters["distanceMetric"].value == "sum": 108 | value = np.sum(values) 109 | elif self.parameters["distanceMetric"].value == "Veire": 110 | value = np.sum(values) * len(values) 111 | else: 112 | raise Exception("Bad distance metric parameter" + self.parameters["distanceMetric"].value ) 113 | phases.append(value) 114 | 115 | # bestPhase = np.argmax(phases) 116 | return phases 117 | 118 | 119 | # p = Periodicity(parameterPeriod=4) 120 | # print(p.predictOne([Signal(1, times=[5, 9, 14]), Signal(1, times=[6, 10])], Signal(1, times=range(30)))[0].times) 121 | # print(p.predictOne([Signal(1, times=[5, 9, 6, 10, 14])], Signal(1, times=range(30)))[0].times) 122 | -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/quantize.py: -------------------------------------------------------------------------------- 1 | """ 2 | container for the Periodicity estimator 3 | """ 4 | import copy 5 | from typing import List 6 | 7 | from automix.featureExtraction.estimator import Estimator, Parameter 8 | from automix.model.classes.signal import Signal 9 | 10 | 11 | class Quantize(Estimator): 12 | """ 13 | Estimator infering the periodicity of a track 14 | 15 | Parameters: 16 | - maxThreshold specify the maximum distance between the value and the closest tick in the grid. 17 | by default at -1, it'll take half of the most common difference in the grid 18 | at 0, all the values not exactly on the grid are out of bound 19 | 20 | All the values further away than the threshold from a grid tick are removed 21 | """ 22 | 23 | def __init__(self, 24 | inputSignal="cqtAmplitudeCheckerboardPeaks", 25 | inputGrid="period", 26 | outputSignal="cqtAmplitudeCheckerboardQuantized", 27 | parameterMaxThreshold=-1, 28 | cachingLevel=2, 29 | forceRefreshCache=False): 30 | self.inputs = [inputSignal, inputGrid] 31 | self.outputs = [outputSignal] 32 | self.parameters = {"maxThreshold": Parameter(parameterMaxThreshold)} 33 | self.cachingLevel = cachingLevel 34 | self.forceRefreshCache = forceRefreshCache 35 | 36 | def predictOne(self, inputSignal: Signal, inputGrid: Signal): 37 | output = copy.deepcopy(inputSignal) 38 | output.quantizeTo(inputGrid, maxThreshold=self.parameters["maxThreshold"].value) 39 | return (output, ) 40 | -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/readFile.py: -------------------------------------------------------------------------------- 1 | """ 2 | container for the ReadFile estimator 3 | """ 4 | import librosa 5 | 6 | from automix.featureExtraction.estimator import Estimator, Parameter 7 | from automix.model.classes.signal import Signal 8 | 9 | 10 | class ReadFile(Estimator): 11 | """ 12 | estimator reading a file from a path 13 | TODO: the output here should not be serialized ! 14 | """ 15 | 16 | def __init__(self, 17 | inputPath="path", 18 | outputSamples="samples", 19 | parameterSampleRate=None, 20 | cachingLevel=2, 21 | forceRefreshCache=False): 22 | self.inputs = [inputPath] 23 | self.outputs = [outputSamples] 24 | self.parameters = {"sampleRate": Parameter(parameterSampleRate)} 25 | self.cachingLevel = cachingLevel 26 | self.forceRefreshCache = forceRefreshCache 27 | 28 | def predictOne(self, path): 29 | y, sr = librosa.load(path, sr=self.parameters["sampleRate"].value) 30 | return (Signal(y, sampleRate=sr), ) 31 | 32 | 33 | class GetDuration(Estimator): 34 | """ 35 | Work around to serialize the exact duration of the tracks 36 | """ 37 | def __init__(self, inputs=["samples"], outputs=["duration"]): 38 | super().__init__(inputs=inputs, outputs=outputs) 39 | 40 | def predictOne(self, samples): 41 | return (samples.duration,) -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/replayGain.py: -------------------------------------------------------------------------------- 1 | """ 2 | container for the ReplayGain estimator 3 | """ 4 | from essentia import standard 5 | 6 | from automix.featureExtraction.estimator import Estimator 7 | from automix.model.classes.signal import Signal 8 | 9 | 10 | # TODO replace by loudness? 11 | class ReplayGain(Estimator): 12 | """ 13 | Estimator computing the replayGain from the samples: 14 | I think 14dB of headroom 15 | """ 16 | 17 | def __init__(self, inputSamples="samples", inputGrid="beats", output="replayGain", cachingLevel=0, forceRefreshCache=False): 18 | self.inputs = [inputSamples, inputGrid] 19 | self.outputs = [output] 20 | self.cachingLevel = cachingLevel 21 | self.forceRefreshCache = forceRefreshCache 22 | self.parameters = {} 23 | 24 | def predictOne(self, samples: Signal, grid: Signal): 25 | 26 | if grid is not None: 27 | values = [ 28 | standard.ReplayGain(sampleRate=samples.sampleRate)(samples.getValues(grid.times[i], grid.times[i + 1])) 29 | for i in range(len(grid.times) - 1) 30 | ] 31 | return (Signal(values, times=grid.times[:-1]), ) 32 | else: 33 | values = standard.ReplayGain(sampleRate=samples.sampleRate)(samples.values) 34 | return (Signal(values, times=[0]), ) 35 | 36 | # except RuntimeError 37 | # return ReplayGain(sampleRate=44100)( 38 | # self.readAudioFile(path, sr=44100)) 39 | # see return (ld.loudnessSignal(samples, sampleRate), ) also -------------------------------------------------------------------------------- /automix/featureExtraction/lowLevel/windowing.py: -------------------------------------------------------------------------------- 1 | """ 2 | container for the bandSpectrogram estimator 3 | """ 4 | import copy 5 | from typing import List 6 | 7 | import librosa 8 | import numpy as np 9 | from essentia import standard 10 | 11 | from automix.featureExtraction.estimator import Estimator, Parameter 12 | from automix.model.classes.signal import Signal 13 | from automix.utils import normalize 14 | 15 | # TODO Is it windowing if we are computing only one value per window ? Change the name 16 | class Windowing(Estimator): 17 | def __init__(self, 18 | parameterWindow="rectangular", 19 | parameterAggregation="rmse", 20 | parameterSteps=1, 21 | parameterPanning=0, 22 | inputSamples="samples", 23 | inputGrid="downbeats", 24 | output="RMSE", 25 | cachingLevel=0, 26 | forceRefreshCache=False): 27 | """ 28 | Create a window of the input signal at each grid tick 29 | 30 | parameterWindow: the name of the window function to apply: 31 | - rectangular: Only window currently implemented 32 | 33 | parameterAggregation: What to do to the values in the window 34 | - None 35 | - rmse 36 | - sum 37 | 38 | parameterPanning: How much do you shift, in ratio of the median distance between the grid ticks, the windows boundaries. 39 | Use a neagtive value (ie -0.25) to shift the windows 1/4 of the grid to the left. 40 | 41 | parameterSteps: TODO 42 | """ 43 | # parameterLength: TODO implement 44 | # parameterBands=[[20, 250], [250, 3000], [3000, 22000]], 45 | self.parameters = { 46 | "window": Parameter(parameterWindow), 47 | "aggregation": Parameter(parameterAggregation), 48 | "steps": Parameter(parameterSteps), 49 | "panning": Parameter(parameterPanning) 50 | } 51 | self.inputs = [inputSamples, inputGrid] 52 | self.outputs = [output] 53 | self.cachingLevel = cachingLevel 54 | self.forceRefreshCache = forceRefreshCache 55 | 56 | def predictOne(self, signal: Signal, grid: Signal): 57 | """ 58 | Returns the amplitude or RMSE of the signal between grid ticks 59 | """ 60 | # get the bands. Should I use librosa.fft_frequencies() instead ? 61 | # if self.parameters["bands"].value: 62 | # y = np.array(samples.values) 63 | # sr = samples.sampleRate 64 | # n_fft = 2048 # default value 65 | # srSTFT = sr / (n_fft / 4) # Sample rate of the STFT 66 | # d = librosa.stft(y, n_fft=n_fft) 67 | # # Perception correction 68 | # perceivedD = librosa.perceptual_weighting(np.abs(d)**2, librosa.ffharmonicRMSEt_frequencies(sr=sr, n_fft=n_fft)) 69 | # # perceivedD = np.abs(d) 70 | 71 | # dBands = [[ 72 | # np.mean(frame[int(band[0] * len(frame) / (sr / 2)):int(band[1] * len(frame) / (sr / 2))]) 73 | # for frame in np.ndarray.transpose(perceivedD) 74 | # ] for band in self.parameters["bands"].value] 75 | 76 | # # normalize based on the highest and lowest values accross all frequencies. 77 | # # then, because of the perception correction, and the replayGain normalization, 78 | # # the highest peak (amplitude of one) should be the same accross all frequencies 79 | # dBands = normalize(dBands) 80 | 81 | # rparameterStepsT, grid, square=False) for band in dBands], ) # barBandMSE 82 | 83 | # else:parameterSteps 84 | if self.parameters["steps"].value > 1: 85 | grid = self._subdivide(grid, self.parameters["steps"].value) 86 | return (self._getWindows(signal, 87 | grid, 88 | window=self.parameters["window"].value, 89 | aggregation=self.parameters["aggregation"].value), ) 90 | 91 | def _subdivide(self, grid, steps): 92 | newTimes = [] 93 | for i in range(len(grid.times) - 1): 94 | newTimes = np.concatenate( 95 | (newTimes, np.arange(grid.times[i], grid.times[i + 1], (grid.times[i + 1] - grid.times[i]) / steps))) 96 | 97 | newTimes = np.concatenate((newTimes, [grid.times[-1]])) # TODO: clean that 98 | 99 | return Signal(np.ones(len(newTimes)), times=newTimes) 100 | 101 | def _getWindows(self, 102 | signal: Signal, 103 | grid: Signal, 104 | addAnacrusis=False, 105 | addAfterLastBeat=False, 106 | window="square", 107 | aggregation='rmse'): 108 | """ 109 | Get the root mean square amplitude between each tick of the grid (in seconds). 110 | addAnacrusis add also the energy from the first sample in the signal to the first tick of the grid, 111 | and the last tick of the grid to the last sample of the signal. 112 | return eg [0.1,0.2,0.1,0.2,0.8,0.9,0.8,0.9] 113 | """ 114 | 115 | result = [] 116 | times = copy.copy(grid.times) 117 | # pan times 118 | panning = self.parameters["panning"].value * np.median(np.diff(times)) 119 | times = [time - panning for time in times] 120 | # if addAnacrusis: 121 | # times = np.insert(times, 0, 0) # TODO make it faster by not creating a new array 122 | # annacrusisValues = signal.getValues(0, times[]) 123 | # if len(annacrusisValues): 124 | # result.append(self._getWindow(annacrusisValues, window, aggregation)) 125 | # else: # If the first tick is at 0, then the anacrusis is 0, or [0 ,..., 0] if the signal is multidimensional 126 | # result.append(signal.values[0] * 0.) 127 | 128 | for i in range(len(grid) - 1): 129 | result.append(self._getWindow(signal.getValues(times[i], times[i + 1]), signal.sampleRate, window, aggregation)) 130 | 131 | # if addAfterLastBeat: 132 | # afterValues = signal.getValues(grid.times[-1], signal.duration) 133 | # if len(afterValues): 134 | # result.append(self._getWindow(afterValues, window, aggregation)) 135 | # else: 136 | # result.append(signal.values[0] * 0.) 137 | # else: 138 | # times = times[:-1] 139 | 140 | return Signal(result, times=grid.times[:-1]) 141 | 142 | def _getWindow(self, signal, sr, window, aggregation): 143 | """ 144 | do the aggregation of the samples inside the windo 145 | """ 146 | if aggregation == "rmse": 147 | return np.sqrt(np.mean(np.square(signal), axis=0)) 148 | 149 | elif aggregation == "sum": 150 | return np.sum(signal, axis=0) 151 | 152 | elif aggregation == "replayGain": 153 | return standard.ReplayGain(sampleRate=sr)(signal) 154 | 155 | return signal 156 | -------------------------------------------------------------------------------- /automix/featureExtraction/melody/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MZehren/Automix/dfaa00a9e7c5c0938c0a9d275c07f3a3e5f87e43/automix/featureExtraction/melody/__init__.py -------------------------------------------------------------------------------- /automix/featureExtraction/melody/melodiaProxy.py: -------------------------------------------------------------------------------- 1 | import utils 2 | 3 | import vamp 4 | import librosa 5 | import numpy as np 6 | 7 | 8 | def extractMelodie(path, grid, minSamplesRatio=0.5): 9 | data, rate = Track.readFile(path) 10 | 11 | # The pitch of the main melody. Each row of the output contains a timestamp and the corresponding frequency of the melody in Hertz. 12 | # Non-voiced segments are indicated by zero or negative frequency values. 13 | # Negative values represent the algorithm's pitch estimate for segments estimated as non-voiced, 14 | # in case the melody is in fact present there. 15 | result = vamp.collect(data, rate, "mtg-melodia:melodia") 16 | melodieSampleRate = float(len(result['vector'][1])) / ( 17 | float(len(data)) / rate) 18 | 19 | #Beat samples 20 | beatMelodieSamples = [ 21 | result['vector'][1][int(grid[tickI] * melodieSampleRate):int(( 22 | grid[tickI + 1]) * melodieSampleRate)] 23 | for tickI in range(len(grid) - 1) 24 | ] 25 | beatPositiveSamples = [[sample for sample in samples if sample > 0] 26 | for samples in beatMelodieSamples] 27 | # beatNote = [(np.mean(samples)) if len(samples) >= minNumSamples else "-1" 28 | # for samples in beatPositiveSamples] 29 | 30 | beatNote = [ 31 | utils.hertzToNote(np.percentile(samples, [20, 50, 80])[1]) if 32 | len(samples) >= minSamplesRatio * len(beatMelodieSamples[i]) else "-1" 33 | for i, samples in enumerate(beatPositiveSamples) 34 | ] 35 | return beatNote 36 | -------------------------------------------------------------------------------- /automix/featureExtraction/novelty/__init__.py: -------------------------------------------------------------------------------- 1 | # from .derivative import Derivative 2 | from .checkerboard import Checkerboard 3 | # from .structuralFeatures import StructuralFeatures 4 | # from .olda import OLDA -------------------------------------------------------------------------------- /automix/featureExtraction/novelty/checkerboard.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import List 3 | 4 | import librosa 5 | import numpy as np 6 | from scipy.spatial import distance 7 | from scipy import signal 8 | 9 | from automix.featureExtraction.estimator import Estimator, Parameter 10 | from automix.model.classes.signal import Signal 11 | 12 | 13 | class Checkerboard(Estimator): 14 | """ 15 | Estimator computing the checkerboard novelty from Foote 16 | Implementation from msaf 17 | 18 | Parameters: 19 | - parameterWindowSize = size of the checkerboard kernel for the convolution in ticks of the input 20 | The kernel is split on the middle, so use twice the wanted size 21 | - parameterDistanceMetric = how to compute the distance between samples to create the ssm. 22 | Veire uses cosine distance for tensor, absolute scalar difference for a scalar 23 | - parameterDebugViz: deprecated 24 | - addZerosStart: pad the start of the file with either 0, or the first sample to be able to apply the convolution right from the start of the file 25 | accepts values [None, True, 0] 26 | """ 27 | 28 | def __init__(self, 29 | parameterWindowSize=16, 30 | parameterDistanceMetric="seuclidean", 31 | parameterDebugViz=False, 32 | parameterAddZerosStart=True, 33 | inputSamples="normalizedBarMSE", 34 | outputNovelty="noveltyMSE", 35 | cachingLevel=2, 36 | forceRefreshCache=False): 37 | self.parameters = { 38 | "windowSize": Parameter(parameterWindowSize), 39 | "distanceMetric": Parameter(parameterDistanceMetric), 40 | "debugViz": Parameter(parameterDebugViz), 41 | "addZerosStart": Parameter(parameterAddZerosStart) 42 | } 43 | self.inputs = [inputSamples] 44 | self.outputs = [outputNovelty] 45 | self.cachingLevel = cachingLevel 46 | self.forceRefreshCache = forceRefreshCache 47 | 48 | def compute_gaussian_krnl(self, M): 49 | """Creates a gaussian kernel following Foote's paper.""" 50 | g = signal.gaussian(M, M // 3., sym=True) 51 | G = np.dot(g.reshape(-1, 1), g.reshape(1, -1)) 52 | G[M // 2:, :M // 2] = -G[M // 2:, :M // 2] 53 | G[:M // 2, M // 2:] = -G[:M // 2, M // 2:] 54 | return G 55 | 56 | def compute_ssm(self, X, metric="seuclidean"): 57 | """Computes the self-similarity matrix of X.""" 58 | D = distance.pdist(X, metric=metric) 59 | D = distance.squareform(D) 60 | D /= D.max() # TODO: Why normalizing here ? 61 | return 1 - D 62 | 63 | def compute_nc(self, X, G): 64 | """Computes the novelty curve from the self-similarity matrix X and 65 | the gaussian kernel G.""" 66 | N = X.shape[0] 67 | M = G.shape[0] 68 | nc = np.zeros(N) 69 | 70 | # Convolution on the diagonal 71 | for i in range(M // 2, N - M // 2 + 1): 72 | nc[i] = np.sum(X[i - M // 2:i + M // 2, i - M // 2:i + M // 2] * G) 73 | 74 | # Normalize 75 | # TODO: Why normalizing here ?? 76 | nc += nc.min() 77 | nc /= nc.max() 78 | return nc 79 | 80 | def predictOne(self, samples: Signal) -> Signal: 81 | """ 82 | see Foot 2000 83 | """ 84 | # Make the input array multidimensional 85 | f = samples.values 86 | if f.ndim == 1: 87 | f = np.array([f]).T 88 | if self.parameters["addZerosStart"].value is not None and self.parameters["addZerosStart"].value is not False: 89 | f = np.concatenate((np.zeros([self.parameters["windowSize"].value, f.shape[1]]), f), axis=0) 90 | # if self.parameters["addZerosStart"].value == 0: 91 | # f = np.concatenate((np.zeros([self.parameters["windowSize"].value, f.shape[1]]), f), axis=0) 92 | # else: 93 | # f = np.concatenate((np.ones([self.parameters["windowSize"].value, f.shape[1]]) * f[0], f), axis=0) 94 | 95 | # Compute the self-similarity matrix 96 | S = self.compute_ssm(f, metric=self.parameters["distanceMetric"].value) 97 | 98 | # Compute gaussian kernel 99 | G = self.compute_gaussian_krnl(self.parameters["windowSize"].value) 100 | 101 | # Compute the novelty curve 102 | nc = self.compute_nc(S, G) 103 | 104 | result = copy.copy(samples) 105 | result.values = nc 106 | 107 | if self.parameters["addZerosStart"].value is not None and self.parameters["addZerosStart"].value is not False: 108 | result.values = result.values[self.parameters["windowSize"].value:] 109 | 110 | if self.parameters["debugViz"].value: 111 | self.plot(S, nc) 112 | return (result, ) 113 | 114 | def plot(self, S, nc): 115 | 116 | import matplotlib.pyplot as plt 117 | 118 | plt.matshow(S) 119 | plt.plot(nc * (-100) + (len(nc) + 100)) 120 | plt.show() 121 | 122 | 123 | # cb = Checkerboard() 124 | # x = np.array([[1, 1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 0, 0, 0, 0], 125 | # [0, 0, 0, 0, 1, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]]) 126 | # print("self-similar", cb.compute_nc(x, np.array([[1, 1, 0, 0], [1, 1, 0, 0], [0, 0, 1, 1], [0, 0, 1, 1]]))) 127 | # print("cross-similar", cb.compute_nc(x, np.array([[0, 0, 1, 1], [0, 0, 1, 1], [1, 1, 0, 0], [1, 1, 0, 0]]))) 128 | # print("fll-kernell", cb.compute_nc(x, np.array([[1, 1, -1, -1], [1, 1, -1, -1], [-1, -1, 1, 1], [-1, -1, 1, 1]]))) 129 | -------------------------------------------------------------------------------- /automix/featureExtraction/novelty/derivative.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from scipy import signal 3 | from typing import List 4 | 5 | import findiff 6 | import librosa 7 | import numpy as np 8 | 9 | from automix.featureExtraction.estimator import Estimator, Parameter 10 | from automix.model.classes.signal import Signal 11 | 12 | 13 | class Derivative(Estimator): 14 | """ 15 | Estimator computing the derivative of given discrete signal. 16 | """ 17 | 18 | def __init__(self, 19 | parameterWindowSize=8, 20 | parameterAbsoluteDiff=True, 21 | parameterGaussianCoef=5, 22 | inputSamples="normalizedBarMSE", 23 | outputNovelty="noveltyMSE", 24 | cachingLevel=2, 25 | forceRefreshCache=False): 26 | self.parameters = { 27 | "windowSize": Parameter(parameterWindowSize), 28 | "absoluteDiff": Parameter(parameterAbsoluteDiff), 29 | "gaussianCoef": Parameter(parameterGaussianCoef) 30 | } 31 | self.inputs = [inputSamples] 32 | self.outputs = [outputNovelty] 33 | self.cachingLevel = cachingLevel 34 | self.forceRefreshCache = forceRefreshCache 35 | 36 | def predictOne(self, samples: Signal) -> Signal: 37 | """ 38 | Compute the central finite difference of the form: (-f(x - h)/2 + f(x + h)/2) / h 39 | see: https://en.wikipedia.org/wiki/Finite_difference_coefficient#cite_note-fornberg-1 40 | """ 41 | f = samples.values 42 | W, offsets = self.getCenteredWeights(self.parameters["windowSize"].value, std=self.parameters["gaussianCoef"].value) 43 | 44 | difference = [ 45 | np.sum([f[x + offset] * W[i] for i, offset in enumerate(offsets) if x + offset >= 0 and x + offset < len(f)]) 46 | for x, _ in enumerate(f) 47 | ] 48 | # if len(f.shape) == 2: 49 | # paddedF = np.concatenate((np.zeros((window // 2, f.shape[1])), f, np.zeros((window // 2, f.shape[1])))) 50 | # else: 51 | # paddedF = np.concatenate((np.zeros(window // 2), f, np.zeros(window // 2))) 52 | 53 | # X = range(window // 2, len(paddedF) - window // 2) 54 | # difference = [np.sum([paddedF[x + i - window // 2] * w for i, w in enumerate(W)], axis=0) / window for x in X] 55 | # differenceOld = [(np.sum(f[i:i + window // 2]) - np.sum(f[max(0, i - window // 2):i])) / (window) 56 | # for i, _ in enumerate(f)] 57 | 58 | if self.parameters["absoluteDiff"].value: 59 | difference = np.abs(difference) 60 | 61 | result = copy.copy(samples) 62 | result.values = difference 63 | return (result, ) 64 | 65 | def getCenteredWeights(self, window, coefficients="gaussian", std=5): 66 | """ 67 | returns the coefficient and the indexes 68 | """ 69 | offsets = range(-window // 2, window // 2) 70 | 71 | if coefficients == "findiff": 72 | coefs = findiff.coefficients(1, window)["center"] 73 | return (coefs["coefficients"], coefs["offsets"]) 74 | elif coefficients == "gaussian": 75 | coefs = signal.gaussian(window, std, sym=True) # get the guaussien filters 76 | coefs = coefs * 2 / np.sum(coefs) # scale them to sum to one for each side 77 | coefs = [coef if i >= window // 2 else -coef for i, coef in enumerate(coefs)] # invert the first weights 78 | return coefs, offsets 79 | else: 80 | coefs = [1 / window if i >= window // 2 else -1 / window for i in range(window)] 81 | offsets = [range(-window // 2, window // 2)] 82 | return (coefs, offsets) 83 | 84 | 85 | # import matplotlib.pyplot as plt 86 | 87 | # d = Derivative() 88 | 89 | # w1, _ = d.getCenteredWeights(16, std=4) 90 | # plt.plot(_, w1, label="std 4") 91 | # w1, _ = d.getCenteredWeights(16, std=100) 92 | # plt.plot(_, w1, label="std 100") 93 | # w1, _ = d.getCenteredWeights(16, std=16//3) 94 | # plt.plot(_, w1, label="std 16/3") 95 | 96 | # plt.legend() 97 | # plt.show() -------------------------------------------------------------------------------- /automix/featureExtraction/novelty/olda.py: -------------------------------------------------------------------------------- 1 | from msaf.algorithms.olda import segmenter 2 | 3 | import numpy as np 4 | from automix.featureExtraction.estimator import Estimator, Parameter 5 | from automix.model.classes.signal import Signal 6 | 7 | 8 | class OLDA(Estimator): 9 | """ 10 | Estimator computing the olda novelty from McFee 2014 11 | Implementation from msaf 12 | """ 13 | 14 | def __init__(self, inputSamples="normalizedBarMSE", outputNovelty="noveltyMSE", cachingLevel=2, forceRefreshCache=False): 15 | self.parameters = {} 16 | self.inputs = [inputSamples] 17 | self.outputs = [outputNovelty] 18 | self.cachingLevel = cachingLevel 19 | self.forceRefreshCache = forceRefreshCache 20 | 21 | def predictOne(self, samples: Signal) -> Signal: 22 | """ 23 | TODO 24 | """ 25 | W = np.load("/home/mickael/Documents/programming/article-msa-structure/msaf/msaf/algorithms/olda/models/EstBeats_BeatlesTUT.npy") 26 | 27 | F = W.dot(samples.values) 28 | 29 | kmin, kmax = segmenter.get_num_segs(samples.duration) 30 | est_idxs = segmenter.get_segments(F, kmin=kmin, kmax=kmax) 31 | 32 | return (est_idxs, ) 33 | -------------------------------------------------------------------------------- /automix/featureExtraction/novelty/spectralClustering.py: -------------------------------------------------------------------------------- 1 | from msaf.algorithms.olda import segmenter 2 | 3 | import numpy as np 4 | from automix.featureExtraction.estimator import Estimator, Parameter 5 | from automix.model.classes.signal import Signal 6 | 7 | 8 | class OLDA(Estimator): 9 | """ 10 | Estimator computing the olda novelty from McFee 2014 11 | Implementation from msaf 12 | """ 13 | 14 | def __init__(self, inputSamples="normalizedBarMSE", outputNovelty="noveltyMSE", cachingLevel=2, forceRefreshCache=False): 15 | self.parameters = {} 16 | self.inputs = [inputSamples] 17 | self.outputs = [outputNovelty] 18 | self.cachingLevel = cachingLevel 19 | self.forceRefreshCache = forceRefreshCache 20 | 21 | def predictOne(self, PCP: Signal, MFCC: Signal) -> Signal: 22 | """ 23 | This script identifies the boundaries of a given track using the Spectral 24 | Clustering method published here: 25 | 26 | Mcfee, B., & Ellis, D. P. W. (2014). Analyzing Song Structure with Spectral 27 | Clustering. In Proc. of the 15th International Society for Music 28 | Information Retrieval Conference (pp. 405–410). Taipei, Taiwan. 29 | 30 | Original code by Brian McFee from: 31 | https://github.com/bmcfee/laplacian_segmentation 32 | """ 33 | assert np.array_equal(PCP.times, MFCC.times) 34 | pass 35 | -------------------------------------------------------------------------------- /automix/featureExtraction/novelty/structuralFeatures.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import List 3 | 4 | import librosa 5 | import msaf.utils as U 6 | import numpy as np 7 | from scipy import signal 8 | from scipy.ndimage import filters 9 | from scipy.spatial import distance 10 | 11 | from automix.featureExtraction.estimator import Estimator, Parameter 12 | from automix.featureExtraction.lowLevel import Normalize 13 | from automix.model.classes.signal import Signal 14 | 15 | 16 | class StructuralFeatures(Estimator): 17 | """ 18 | Estimator computing the sf novelty from Serrà 19 | Implementation from msaf 20 | 21 | parameter_m_embedded = the number of samples to put together to make more sense of them -> it's similar to the window size 22 | parameter_M_gaussian = the std deviation of the gaussian filter / 2 23 | """ 24 | 25 | def __init__( 26 | self, 27 | # parameter_Mp_adaptive=28, 28 | # parameter_offset_thres=0.05, 29 | parameter_M_gaussian=16, 30 | parameter_m_embedded=3, 31 | parameter_k_nearest=0.04, 32 | parameter_bound_norm_feats=np.inf, 33 | inputSamples="normalizedBarMSE", 34 | outputNovelty="noveltyMSE", 35 | cachingLevel=2, 36 | forceRefreshCache=False): 37 | self.parameters = { 38 | # "Mp_adaptive": Parameter(parameter_Mp_adaptive), 39 | # "offset_thres": Parameter(parameter_offset_thres), 40 | "M_gaussian": Parameter(parameter_M_gaussian), 41 | "m_embedded": Parameter(parameter_m_embedded), 42 | "k_nearest": Parameter(parameter_k_nearest), 43 | "bound_norm_feats": Parameter(parameter_bound_norm_feats) 44 | } 45 | self.inputs = [inputSamples] 46 | self.outputs = [outputNovelty] 47 | self.cachingLevel = cachingLevel 48 | self.forceRefreshCache = forceRefreshCache 49 | 50 | def predictOne(self, samples: Signal) -> Signal: 51 | """ 52 | TODO 53 | """ 54 | # Structural Features params 55 | # Mp = self.parameters["Mp_adaptive"].value # Size of the adaptive threshold for 56 | # peak picking 57 | # od = self.parameters["offset_thres"].value # Offset coefficient for adaptive 58 | # thresholding 59 | 60 | M = self.parameters["M_gaussian"].value # Size of gaussian kernel in beats 61 | m = self.parameters["m_embedded"].value # Number of embedded dimensions 62 | k = self.parameters["k_nearest"].value # k*N-nearest neighbors for the 63 | # recurrence plot 64 | 65 | # Preprocess to obtain features, times, and input boundary indeces 66 | F = np.array(samples.values) 67 | if F.ndim == 1: 68 | F = np.array([F]).T 69 | 70 | if len(F.shape) == 2: 71 | F = np.concatenate((np.zeros((m // 2, F.shape[1])), F, np.zeros((m // 2, F.shape[1])))) 72 | else: 73 | F = np.concatenate((np.zeros(m // 2), F, np.zeros(m // 2))) 74 | # Normalize 75 | # F_norm = Normalize().predictOne(F) 76 | # F = U.normalize(F, norm_type=self.parameters["bound_norm_feats"].value) 77 | 78 | # Check size in case the track is too short 79 | if F.shape[0] > 20: 80 | 81 | # if self.framesync: # Whether to use frame-synchronous or beat-synchronous features. 82 | # red = 0.1 83 | # F_copy = np.copy(F) 84 | # F = librosa.util.utils.sync(F.T, np.linspace(0, F.shape[0], num=F.shape[0] * red), pad=False).T 85 | 86 | # Emedding the feature space (i.e. shingle) 87 | # E[i] = F[i]+F[i+1]+F[i+2] 88 | E = embedded_space(F, m) 89 | # plt.imshow(E.T, interpolation="nearest", aspect="auto"); plt.show() 90 | 91 | # Recurrence matrix 92 | R = librosa.segment.recurrence_matrix( 93 | E.T, 94 | k=k * int(F.shape[0]), 95 | width=1, # zeros from the diagonal 96 | metric="euclidean", 97 | sym=True).astype(np.float32) 98 | 99 | # Circular shift 100 | L = circular_shift(R) 101 | 102 | # Obtain structural features by filtering the lag matrix 103 | SF = gaussian_filter(L.T, M=M, axis=1) 104 | SF = gaussian_filter(L.T, M=1, axis=0) 105 | 106 | # Compute the novelty curve 107 | nc = compute_nc(SF) 108 | nc = nc[m//2:-m//2] 109 | times = samples.times[:-m] 110 | return (Signal(nc, times=times), ) 111 | else: 112 | return (None, ) 113 | 114 | 115 | def median_filter(X, M=8): 116 | """Median filter along the first axis of the feature matrix X.""" 117 | for i in range(X.shape[1]): 118 | X[:, i] = filters.median_filter(X[:, i], size=M) 119 | return X 120 | 121 | 122 | def gaussian_filter(X, M=8, axis=0): 123 | """Gaussian filter along the first axis of the feature matrix X.""" 124 | for i in range(X.shape[axis]): 125 | if axis == 1: 126 | X[:, i] = filters.gaussian_filter(X[:, i], sigma=M / 2.) 127 | elif axis == 0: 128 | X[i, :] = filters.gaussian_filter(X[i, :], sigma=M / 2.) 129 | return X 130 | 131 | 132 | def compute_gaussian_krnl(M): 133 | """Creates a gaussian kernel following Serra's paper.""" 134 | g = signal.gaussian(M, M / 3., sym=True) 135 | G = np.dot(g.reshape(-1, 1), g.reshape(1, -1)) 136 | G[M // 2:, :M // 2] = -G[M // 2:, :M // 2] 137 | G[:M // 2, M // 1:] = -G[:M // 2, M // 1:] 138 | return G 139 | 140 | 141 | def compute_ssm(X, metric="seuclidean"): 142 | """Computes the self-similarity matrix of X.""" 143 | D = distance.pdist(X, metric=metric) 144 | D = distance.squareform(D) 145 | D /= float(D.max()) 146 | return 1 - D 147 | 148 | 149 | def compute_nc(X): 150 | """Computes the novelty curve from the structural features.""" 151 | N = X.shape[0] 152 | # nc = np.sum(np.diff(X, axis=0), axis=1) # Difference between SF's 153 | 154 | nc = np.zeros(N) 155 | for i in range(N - 1): 156 | nc[i] = distance.euclidean(X[i, :], X[i + 1, :]) 157 | 158 | # Normalize 159 | nc += np.abs(nc.min()) 160 | nc /= float(nc.max()) 161 | return nc 162 | 163 | 164 | def pick_peaks(nc, L=16, offset_denom=0.1): 165 | """Obtain peaks from a novelty curve using an adaptive threshold.""" 166 | offset = nc.mean() * float(offset_denom) 167 | th = filters.median_filter(nc, size=L) + offset 168 | # th = filters.gaussian_filter(nc, sigma=L/2., mode="nearest") + offset 169 | # import pylab as plt 170 | # plt.plot(nc) 171 | # plt.plot(th) 172 | # plt.show() 173 | # th = np.ones(nc.shape[0]) * nc.mean() - 0.08 174 | peaks = [] 175 | for i in range(1, nc.shape[0] - 1): 176 | # is it a peak? 177 | if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]: 178 | # is it above the threshold? 179 | if nc[i] > th[i]: 180 | peaks.append(i) 181 | return peaks 182 | 183 | 184 | def circular_shift(X): 185 | """Shifts circularly the X squre matrix in order to get a 186 | time-lag matrix.""" 187 | N = X.shape[0] 188 | L = np.zeros(X.shape) 189 | for i in range(N): 190 | L[i, :] = np.asarray([X[(i + j) % N, j] for j in range(N)]) 191 | return L 192 | 193 | 194 | def embedded_space(X, m, tau=1): 195 | """Time-delay embedding with m dimensions and tau delays.""" 196 | N = X.shape[0] - int(np.ceil(m)) 197 | Y = np.zeros((N, int(np.ceil(X.shape[1] * m)))) 198 | for i in range(N): 199 | # print X[i:i+m,:].flatten().shape, w, X.shape 200 | # print Y[i,:].shape 201 | rem = int((m % 1) * X.shape[1]) # Reminder for float m 202 | Y[i, :] = np.concatenate((X[i:i + int(m), :].flatten(), X[i + int(m), :rem])) 203 | return Y 204 | -------------------------------------------------------------------------------- /automix/featureExtraction/structure/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MZehren/Automix/dfaa00a9e7c5c0938c0a9d275c07f3a3e5f87e43/automix/featureExtraction/structure/__init__.py -------------------------------------------------------------------------------- /automix/featureExtraction/structure/eval.py: -------------------------------------------------------------------------------- 1 | import logging as log 2 | from typing import List 3 | 4 | import mir_eval 5 | import numpy as np 6 | 7 | from automix.model.classes.signal import Signal, SparseSegmentSignal 8 | 9 | 10 | def evalCuesMutliple( 11 | Y_pred: List[Signal], 12 | Y_truth: List[Signal], 13 | window=0.5, 14 | averagePerDocument=False, # TODO: rename this approach "mean" or "sum" 15 | returnDetails=False, 16 | limitSearchSpace=False, 17 | limitSearchNumber=False): 18 | """ 19 | Get the metrics for an array of signals 20 | window = hit distance 21 | averagePerDocument = Return the document average instead of the sample average ("mean" vs "sum") 22 | returnDetails = return each individual score instead of the average 23 | limitSearchSpace = compute the hitRate only for the part annotated of the track. Stop at the last annotation. 24 | All the points detected after are not taken into account 25 | """ 26 | 27 | result = {} 28 | if limitSearchSpace: # Remove all the prediction after the last annotation 29 | maxY = [max(Y_truth[i].times) if len(Y_truth[i].times) else 0 for i, _ in enumerate(Y_truth)] 30 | Y_pred = [Signal(1, times=[t for t in Y_pred[i].times if t <= maxY[i] + window]) for i, _ in enumerate(Y_pred)] 31 | 32 | if limitSearchNumber: # Remove all the predictions above the number of annotations in each track 33 | nY = [len(Y_truth[i]) for i, _ in enumerate(Y_truth)] 34 | Y_pred = [Signal(1, times=Y_pred[i].times[: nY[i]]) for i, _ in enumerate(Y_pred)] 35 | 36 | if averagePerDocument: # Average per documents: sum ? 37 | tracksResult = [evalCues(Y_pred[i], Y_truth[i], window=window) for i in range(len(Y_truth))] 38 | for field, value in tracksResult[0].items(): 39 | if returnDetails: 40 | result[field] = [measure[field] for measure in tracksResult] 41 | else: 42 | result[field] = np.sum([measure[field] for measure in tracksResult]) / len(tracksResult) 43 | else: # Average per points: mean? 44 | # TODO: redundant call to hit 45 | precision = np.sum([hit(Y_pred[i], Y_truth[i], window) 46 | for i, _ in enumerate(Y_truth)]) / np.sum(len(Y_pred[i]) for i, _ in enumerate(Y_truth)) 47 | recall = np.sum([hit(Y_pred[i], Y_truth[i], window) 48 | for i, _ in enumerate(Y_truth)]) / np.sum(len(Y_truth[i]) for i, _ in enumerate(Y_truth)) 49 | fMeasure = 2 * (precision * recall) / (precision + recall) 50 | result = {"precision": precision, "recall": recall, "fMeasure": fMeasure} 51 | log.debug(result) 52 | return result 53 | 54 | 55 | def evalCues(y_: Signal, y: Signal, window=0.5): 56 | """Get the F1, Precision and recall of estimated points (y_) to reference points (y) 57 | 58 | Args: 59 | ---- 60 | y_ (Signal): [description] 61 | y (Signal): [description] 62 | window (float, optional): [description]. Defaults to 0.5. 63 | 64 | Returns: 65 | ------- 66 | [type]: [description] 67 | """ 68 | F, P, R = mir_eval.onset.f_measure(y.times, y_.times, window=window) 69 | return {"precision": P, "recall": R, "fMeasure": F} 70 | 71 | 72 | def myEvalCues(y_: Signal, y: Signal, window=0.5): 73 | """ 74 | return the precision, recall, f, in function of list of times. 75 | You can specify the minDistance 76 | """ 77 | if len(y_.times) == 0: 78 | return {"precision": 0, "recall": 0, "fMeasure": 0} 79 | 80 | # Probability that A is correct 81 | precision = hit(y_, y, window) / len(y_) 82 | # % of C we find 83 | recall = hit(y, y_, window) / len(y) 84 | if precision == 0 and recall == 0: 85 | fMeasure = 0 86 | else: 87 | fMeasure = 2 * (precision * recall) / (precision + recall) 88 | return {"precision": precision, "recall": recall, "fMeasure": fMeasure} 89 | 90 | 91 | def hit(y_pred: Signal, y_truth: Signal, window: float): 92 | """ return the number of detection within a small window of an annotation. 93 | TODO (without counting an annotation mutilitple times) 94 | 95 | Args: 96 | ---- 97 | y_ (Signal): Estimations 98 | y (Signal): Ground Thruth 99 | window (float): hit threshold 100 | 101 | Returns: 102 | ------- 103 | [type]: [description] 104 | """ 105 | if y_pred.times is None or y_truth.times is None: 106 | return 0 107 | 108 | if isinstance(y_truth, SparseSegmentSignal): 109 | return len([ 110 | a for a in y_pred.times if a is not None 111 | and any([1 for i, _ in enumerate(y_truth.times) if a >= y_truth.times[i][0] - window and a <= y_truth.times[i][1] + window]) 112 | ]) 113 | else: 114 | return len([a for a in y_pred.times if a is not None and any([np.abs(a - c) < window for c in y_truth.times])]) 115 | -------------------------------------------------------------------------------- /automix/featureExtraction/structure/msafProxy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Proxy to the music structure analysis framework 3 | """ 4 | 5 | import numpy as np 6 | 7 | from automix.featureExtraction.estimator import Estimator, Parameter 8 | from automix.model.classes.signal import SparseSignal 9 | 10 | 11 | class MsafProxy(Estimator): 12 | """ 13 | run MSAF: https://github.com/urinieto/msaf 14 | 15 | * inputPath: input feature containing the path to the audio file 16 | * outputSignal: name of the output feature 17 | * algorithm: name of the algorithm 18 | * feature: name of the feature to use 19 | presented combo in the article are : ["scluster", None] ["olda", None], ["sf", "cqt"] 20 | or 21 | feat_dict = { 22 | 'sf': 'pcp', 23 | 'levy': 'pcp', 24 | 'foote': 'pcp', 25 | 'siplca': '', 26 | 'olda': '', 27 | 'cnmf': 'pcp', 28 | '2dfmc': '' 29 | } 30 | """ 31 | 32 | def __init__(self, inputPath="path", outputSignal="msaf-scluster", algorithm="scluster", feature=None): 33 | super().__init__(parameters={"algorithm": algorithm, "feature": feature}, inputs=[inputPath], outputs=[outputSignal]) 34 | 35 | def predictOne(self, path): 36 | """ 37 | Returns the structure and label from the algorithm specified 38 | Removes the first and last boundarie which is the start and the end of the track 39 | """ 40 | import msaf 41 | if self.parameters["feature"].value is None: 42 | boundaries, labels = msaf.process(path, boundaries_id=self.parameters["algorithm"].value) 43 | else: 44 | boundaries, labels = msaf.process(path, 45 | boundaries_id=self.parameters["algorithm"].value, 46 | feature=self.parameters["feature"].value) 47 | return (SparseSignal(labels[1:], boundaries[1:-1]), ) 48 | -------------------------------------------------------------------------------- /automix/featureExtraction/structure/salientPointDetection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate the structure of the song with the mean square energy (MSE) 3 | """ 4 | 5 | import numpy as np 6 | 7 | from automix.featureExtraction.estimator import Estimator, Parameter 8 | from automix.model.classes.segment import Segment 9 | from automix.model.classes.signal import Signal 10 | 11 | from automix.featureExtraction.structure.eval import evalStructureMIR, segmentsToMirEval 12 | 13 | 14 | class SalientPointDetection(Estimator): 15 | # TODO: covert the low threshold in percentile instead of absolute value 16 | def __init__(self, 17 | parameterDiffThreshold=5, 18 | parameterRatioThreshold=0, 19 | parameterKMax=0, 20 | parameterzeroThreshold=0.000001, 21 | inputSignal="barMSE", 22 | outputBoundaries="boundaries", 23 | outputLabels="labels", 24 | outputOnsets="onsets", 25 | cachingLevel=0, 26 | forceRefreshCache=False): 27 | """ 28 | Estimator computing the structure of a track based on salient points in the input signal 29 | the input signal should not be too noisy. It works best when the input signal is already averaged per beats or per downbeats 30 | 31 | Parameters 32 | ---------- 33 | diffThreshold (optional float): 34 | threshold setting the difference between the previous sample and the current one to label it as a segment. 35 | this difference is computed in term of diffThreshold * (mean difference between each sample) 36 | 37 | kMax (optional int): 38 | If you don't want to use a factor for the saillance detection, you can specify to return the k-most saillant points. 39 | 40 | ratioThreshold (optional float): 41 | thresold setting the ratio difference between two point to consider them saillant 42 | 43 | zeroThreshold (float): 44 | Indicating the threshold under which an amplitude should be considered as being zero. 45 | it's usefull to set the first segment where the difference between the samples is not big because the signal is rizing from silence. 46 | """ 47 | self.parameters = { 48 | "diffThreshold": Parameter(parameterDiffThreshold), 49 | "ratioThreshold": Parameter(parameterRatioThreshold), 50 | "kMax": Parameter(parameterKMax), 51 | "zeroThreshold": Parameter(parameterzeroThreshold) 52 | } 53 | raise DeprecationWarning() 54 | self.inputs = [inputSignal] 55 | self.outputs = [outputBoundaries, outputLabels, outputOnsets] 56 | self.cachingLevel = cachingLevel 57 | self.forceRefreshCache = forceRefreshCache 58 | 59 | def _getAutomaticThreshold(self, signal, bins=10): 60 | """based on the method in https://ant-s4.unibw-hamburg.de/dafx/papers/ 61 | DAFX02_Duxbury_Sandler_Davis_note_onset_detection.pdf""" 62 | onsets, absOnsets = self._getOnsets(signal) 63 | count = np.histogram(absOnsets, bins=np.linspace(min(absOnsets), max(absOnsets), bins)) 64 | t = 1 65 | dCount = [count[0][i + t] - count[0][i] / t for i in range(len(count[0]) - t)] 66 | ddCount = [dCount[i + t] - dCount[i] / t for i in range(len(dCount) - t)] 67 | threshold = count[1][np.argmax(ddCount) + 2 * t] 68 | return self._getBoundaries(signal, absOnsets, threshold, self.parameters["zeroThreshold"].value), onsets 69 | 70 | def _getDiffThresholdBoundaries(self, signal): 71 | onsets, absOnsets = self._getOnsets(signal) 72 | threshold = self.parameters["diffThreshold"].value * np.mean(absOnsets) 73 | return self._getBoundaries(signal, absOnsets, threshold, self.parameters["zeroThreshold"].value), onsets 74 | 75 | def _getKMaxBoundaries(self, signal): 76 | onsets, absOnsets = self._getOnsets(signal) 77 | threshold = list(sorted(absOnsets))[-self.parameters["kMax"].value] 78 | return self._getBoundaries(signal, absOnsets, threshold, self.parameters["zeroThreshold"].value), onsets 79 | 80 | def _getOnsets(self, signal): 81 | """ 82 | Returns a first order difference of the signal and the absolute first order difference 83 | 84 | """ 85 | diff = np.diff(signal) 86 | return Signal(diff, times=signal.getTimes()[1:]), Signal(np.abs(diff), times=signal.getTimes()[1:]) 87 | 88 | def _getBoundaries(self, signal, onsets, threshold, zeroThreshold): 89 | """ 90 | Return the points with an onset above the threshold or samples after a zero amplitude sample. 91 | """ 92 | # i+1 because the onsets are shifted to the left relative to the signal 93 | return [ 94 | i + 1 for i, diff in enumerate(onsets) 95 | if diff >= threshold or (signal[i] < zeroThreshold and signal[i + 1] > self.parameters["zeroThreshold"].value) 96 | ] 97 | 98 | def _getRatioThresholdBoundaries(self, signal): 99 | onsets = Signal([signal[i + 1] / signal[i] if signal[i] != 0 else 10000 for i in range(len(signal) - 1)], 100 | times=signal.getTimes()[1:]) 101 | incTH = self.parameters["ratioThreshold"].value 102 | decTH = 1. / incTH 103 | return [i + 1 for i, ratio in enumerate(onsets) if ratio >= incTH or ratio <= decTH], onsets 104 | 105 | def predictOne(self, signal): 106 | """ 107 | get the structure from the saillant points 108 | 109 | Parameters: 110 | signal (Signal): Signal to create segments from 111 | """ 112 | # get the structure from the boundaries and the phase. 113 | if self.parameters["kMax"].value: 114 | sailantIndexes, onsets = self._getKMaxBoundaries(signal) 115 | elif self.parameters["diffThreshold"].value: 116 | sailantIndexes, onsets = self._getDiffThresholdBoundaries(signal) 117 | elif self.parameters["ratioThreshold"].value: 118 | sailantIndexes, onsets = self._getRatioThresholdBoundaries(signal) 119 | else: 120 | sailantIndexes, onsets = self._getAutomaticBoundaries(signal) 121 | 122 | # appending the last elements as the end of the signal 123 | sailantIndexes.append(len(signal) - 1) 124 | 125 | # quantize the boundaries to the closest loop grid from the rising 126 | # boundaries. 127 | # phase = 0 128 | # if self.parameters["loopLength"].value != -1: 129 | # phase = (np.argmax([ 130 | # np.sum([ 131 | # absMSADiff[i] 132 | # for i in range(j, len(absMSADiff), self.parameters["loopLength"].value) 133 | # ]) for j in range(self.parameters["loopLength"].value) 134 | # ]) + 1) % self.parameters["loopLength"].value 135 | 136 | # boundaries = quantization.quantize( 137 | # range(phase, len(grid), self.parameters["loopLength"].value), boundaries) 138 | 139 | # phase = 3 if phase == 0 else phase - 1 140 | 141 | # convert the boundaries to the real times 142 | # Because of the anacrusis which is not present in the grid, the 143 | # boundaries indexes are shifted by one 144 | # [grid[i - 1] for i in boundaries] 145 | boundaries = signal.getTimes(sailantIndexes) 146 | # same with the phase which goes from 0 to three 147 | 148 | # Add the labels 149 | segmentedSignal = [signal.values[sailantIndexes[i]:sailantIndexes[i + 1]] for i in range(len(sailantIndexes) - 1)] 150 | labels = ["Start"] + [ 151 | Segment.getLabel(segment, np.mean(signal.values), i, len(segmentedSignal)) 152 | for i, segment in enumerate(segmentedSignal) 153 | ] 154 | 155 | # segments = [ 156 | # Segment( 157 | # labels[i], 158 | # start=boundariesTime[i], 159 | # barStart=boundaries[i], 160 | # end=boundariesTime[i + 1], 161 | # barEnd=boundaries[i + 1], 162 | # duration=boundariesTime[i + 1] - boundariesTime[i], 163 | # barDuration=boundaries[i + 1] - boundaries[i], 164 | # onsetIntensity=msaDiff[boundaries[i] - 1] / max(msaDiff)) 165 | # for i in range(len(boundaries) - 1) 166 | # ] 167 | # TODO: why not use that : boundaries = Signal(onsetIntensity, times=[boundariesTime]) 168 | 169 | # loopBoundaries = [grid[i] for i in range(phase, len(grid), self.parameters["loopLength"].value)] 170 | return (boundaries, labels, onsets) 171 | 172 | def evaluate(self, X, y): 173 | y_ = [segment.start for segment in self.predict(X)] 174 | return [evalStructureMIR(segmentsToMirEval(y_[i]), segmentsToMirEval(y[i])) for i in range(len(X))] 175 | -------------------------------------------------------------------------------- /automix/featureExtraction/structure/spectralClustering.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate the structure of the song with the mean square energy (MSE) 3 | """ 4 | 5 | import numpy as np 6 | 7 | from automix.featureExtraction.estimator import Estimator, Parameter 8 | from automix.model.classes.segment import Segment 9 | from automix.model.classes.signal import Signal 10 | 11 | from . import spectralClusteringSegmenter as segmenter 12 | 13 | 14 | class SpectralClustering(Estimator): 15 | # TODO: covert the low threshold in percentile instead of absolute value 16 | def __init__(self, input="path", output="spectralClustering", cachingLevel=0, forceRefreshCache=False): 17 | """ 18 | """ 19 | self.inputs = [input] 20 | self.outputs = [output] 21 | self.parameters = {} 22 | self.cachingLevel = cachingLevel 23 | self.forceRefreshCache = forceRefreshCache 24 | 25 | def predictOne(self, path: str): 26 | 27 | X_cqt, X_timbre, beat_intervals = segmenter.features(path) 28 | 29 | boundaries, beat_intervals, labels = segmenter.lsd(X_cqt, X_timbre, beat_intervals, {"num_types": False}) 30 | result = Signal(labels, times=[beat_intervals[i][0] for i in boundaries[:-1]], sparse=True) 31 | return (result,) 32 | 33 | -------------------------------------------------------------------------------- /automix/featureExtraction/vocalSeparation/__init__.py: -------------------------------------------------------------------------------- 1 | from .librosaVocalSeparation import LibrosaVocalSeparation 2 | from .vocalMelodyExtraction import VocalMelodyExtraction 3 | -------------------------------------------------------------------------------- /automix/featureExtraction/vocalSeparation/librosaVocalSeparation.py: -------------------------------------------------------------------------------- 1 | """ 2 | container for the vocal separation estimator 3 | """ 4 | 5 | import librosa 6 | import librosa.display 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | 10 | from automix.featureExtraction.estimator import Estimator, Parameter 11 | from automix.model.classes.signal import Signal 12 | 13 | 14 | class LibrosaVocalSeparation(Estimator): 15 | """ 16 | estimator based on http://librosa.github.io/librosa/auto_examples/plot_vocal_separation.html?highlight=vocalness 17 | """ 18 | 19 | def __init__(self, 20 | inputPath="samples", 21 | outputVocals="vocals", 22 | outoutBackground="background", 23 | cachingLevel=2, 24 | forceRefreshCache=False): 25 | self.inputs = [inputPath] 26 | self.outputs = [outputVocals, outoutBackground] 27 | self.parameters = {} 28 | self.cachingLevel = cachingLevel 29 | self.forceRefreshCache = forceRefreshCache 30 | 31 | def predictOne(self, samples: Signal): 32 | """ 33 | """ 34 | y, sr = samples.values, samples.sampleRate 35 | 36 | # And compute the spectrogram magnitude and phase 37 | S_full, phase = librosa.magphase(librosa.stft(y)) 38 | 39 | hopLength = 2048 / 4 40 | newSampleRate = sr / hopLength 41 | 42 | # We'll compare frames using cosine similarity, and aggregate similar frames 43 | # by taking their (per-frequency) median value. 44 | # 45 | # To avoid being biased by local continuity, we constrain similar frames to be 46 | # separated by at least 2 seconds. 47 | # 48 | # This suppresses sparse/non-repetetitive deviations from the average spectrum, 49 | # and works well to discard vocal elements. 50 | S_filter = librosa.decompose.nn_filter(S_full, 51 | aggregate=np.median, 52 | metric='cosine', 53 | width=int(librosa.time_to_frames(2, sr=sr))) 54 | 55 | # The output of the filter shouldn't be greater than the input 56 | # if we assume signals are additive. Taking the pointwise minimium 57 | # with the input spectrum forces this. 58 | S_filter = np.minimum(S_full, S_filter) 59 | 60 | # We can also use a margin to reduce bleed between the vocals and instrumentation masks. 61 | # Note: the margins need not be equal for foreground and background separation 62 | margin_i, margin_v = 2, 10 63 | power = 2 64 | 65 | mask_i = librosa.util.softmask(S_filter, margin_i * (S_full - S_filter), power=power) 66 | 67 | mask_v = librosa.util.softmask(S_full - S_filter, margin_v * S_filter, power=power) 68 | 69 | # Once we have the masks, simply multiply them with the input spectrum 70 | # to separate the components 71 | S_foreground = mask_v * S_full 72 | S_background = mask_i * S_full 73 | 74 | # # sphinx_gallery_thumbnail_number = 2 75 | # idx = slice(*librosa.time_to_frames([30, 35], sr=sr)) 76 | # plt.figure(figsize=(12, 8)) 77 | # plt.subplot(3, 1, 1) 78 | # librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx], ref=np.max), 79 | # y_axis='log', sr=sr) 80 | # plt.title('Full spectrum') 81 | # plt.colorbar() 82 | 83 | # plt.subplot(3, 1, 2) 84 | # librosa.display.specshow(librosa.amplitude_to_db(S_background[:, idx], ref=np.max), 85 | # y_axis='log', sr=sr) 86 | # plt.title('Background') 87 | # plt.colorbar() 88 | # plt.subplot(3, 1, 3) 89 | # librosa.display.specshow(librosa.amplitude_to_db(S_foreground[:, idx], ref=np.max), 90 | # y_axis='log', x_axis='time', sr=sr) 91 | # plt.title('Foreground') 92 | # plt.colorbar() 93 | # plt.tight_layout() 94 | # plt.show() 95 | 96 | return (Signal(S_foreground.T, sampleRate=newSampleRate), Signal(S_background.T, sampleRate=newSampleRate)) 97 | -------------------------------------------------------------------------------- /automix/featureExtraction/vocalSeparation/vocalMelodyExtraction.py: -------------------------------------------------------------------------------- 1 | """ 2 | container for the vocal separation estimator 3 | """ 4 | 5 | import librosa 6 | import librosa.display 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | from pkg_resources import resource_filename 10 | import importlib 11 | 12 | from automix.featureExtraction.estimator import Estimator, Parameter 13 | from automix.model.classes.signal import Signal 14 | 15 | 16 | class VocalMelodyExtraction(Estimator): 17 | """ 18 | estimator based on https://github.com/s603122001/Vocal-Melody-Extraction 19 | """ 20 | 21 | def __init__(self, 22 | inputPath="path", 23 | outputClassification="vocals", 24 | outputPitch="vocalsMelody", 25 | parameterModel="Seg", 26 | cachingLevel=0, 27 | forceRefreshCache=False): 28 | self.inputs = [inputPath] 29 | self.outputs = [outputClassification, outputPitch] 30 | self.parameters = {"model": Parameter(parameterModel)} 31 | self.cachingLevel = cachingLevel 32 | self.forceRefreshCache = forceRefreshCache 33 | 34 | def predictOne(self, path: str): 35 | """ 36 | method copied from the main file in the project 37 | """ 38 | # pkg_resources.() 39 | # project = importlib.import_module("vendors.Vocal-Melody-Extraction.project") 40 | from project.MelodyExt import feature_extraction 41 | from project.utils import load_model, save_model, matrix_parser 42 | from project.test import inference 43 | from project.model import seg, seg_pnn, sparse_loss 44 | from project.train import train_audio 45 | 46 | # load wav 47 | song = path 48 | 49 | # Feature extraction 50 | feature = feature_extraction(song) 51 | feature = np.transpose(feature[0:4], axes=(2, 1, 0)) 52 | 53 | # load model 54 | 55 | model = load_model( 56 | resource_filename(__name__, 57 | "../../../vendors/Vocal-Melody-Extraction/Pretrained_models/" + self.parameters["model"].value)) 58 | batch_size_test = 10 59 | # Inference 60 | print(feature[:, :, 0].shape) 61 | extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=batch_size_test) 62 | 63 | # Output 64 | r = matrix_parser(extract_result) 65 | return (Signal(r[:, 0], sampleRate=50), Signal(r[:, 1], sampleRate=50)) 66 | -------------------------------------------------------------------------------- /automix/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MZehren/Automix/dfaa00a9e7c5c0938c0a9d275c07f3a3e5f87e43/automix/model/__init__.py -------------------------------------------------------------------------------- /automix/model/classes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MZehren/Automix/dfaa00a9e7c5c0938c0a9d275c07f3a3e5f87e43/automix/model/classes/__init__.py -------------------------------------------------------------------------------- /automix/model/classes/deck.py: -------------------------------------------------------------------------------- 1 | """ 2 | Deck container 3 | """ 4 | import copy 5 | from typing import List 6 | 7 | from automix.model.classes.track import Track 8 | 9 | 10 | class Deck(object): 11 | """ 12 | Data structure to contain a Deck with effects. 13 | It corresponds to tracks in Reaper 14 | """ 15 | 16 | def __init__(self, name: str = "", tracks: List[Track] = None): 17 | self.tracks = tracks if tracks else [] 18 | self.name = name 19 | 20 | self.FX = {"gainPt": [], "volPt": [], "lowPt": [], "midPt": [], "highPt": [], "hpfPt": [], "lpfPt": []} 21 | 22 | def append(self, track: Track): 23 | """ 24 | Add a track to the deck 25 | """ 26 | self.tracks.append(track) 27 | 28 | def updateFX(self): 29 | """ 30 | Apply the track effects to the Deck 31 | """ 32 | for track in self.tracks: 33 | for FX, values in track.FX.items(): 34 | if FX not in self.FX: 35 | self.FX[FX] = [] 36 | for pt in values: 37 | newPt = copy.copy(pt) 38 | newPt.position = track.getDeckTime(pt.position) 39 | self.FX[FX].append(newPt) 40 | -------------------------------------------------------------------------------- /automix/model/classes/lazyProperty.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | class lazy_property(object): 4 | ''' 5 | meant to be used for lazy evaluation of an object attribute. 6 | property should represent non-mutable data, as it replaces itself. 7 | from https://stackoverflow.com/questions/3012421/python-memoising-deferred-lookup-property-decorator 8 | ''' 9 | 10 | def __init__(self, fget): 11 | self.fget = fget 12 | 13 | # copy the getter function's docstring and other attributes 14 | functools.update_wrapper(self, fget) 15 | 16 | def __get__(self, obj, cls): 17 | if obj is None: 18 | return self 19 | 20 | value = self.fget(obj) 21 | setattr(obj, self.fget.__name__, value) 22 | return value 23 | -------------------------------------------------------------------------------- /automix/model/classes/point.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List 3 | 4 | 5 | class Shape(Enum): 6 | """ 7 | Enum for the shape available for the points 8 | 0: linear, 1: square, 2: slow start/end, 3: fast start, 4: fast end, 5: bezier 9 | """ 10 | LINEAR = 0 11 | SQUARE = 1 12 | SIGMOID = 2 13 | FASTSTART = 3 14 | FASTEND = 4 15 | BEZIER = 5 16 | 17 | def jsonEncode(self): 18 | return str(self) 19 | 20 | 21 | class Point(object): 22 | """ 23 | Represents a Reaper's point for automation 24 | """ 25 | 26 | def __init__(self, position=0, amplitude=0, shape=Shape(5), curve=0): 27 | """ 28 | Position in s 29 | Amplitude in dB 30 | Shape as described in Shape Enum 31 | curve only implemented for the bezier shape. (-1 = fast start, 0 = linear, 1 = fast end) 32 | """ 33 | self.position = position 34 | self.amplitude = amplitude 35 | self.shape = shape 36 | self.curve = curve -------------------------------------------------------------------------------- /automix/model/eval/__init__.py: -------------------------------------------------------------------------------- 1 | import mir_eval 2 | import numpy as np 3 | 4 | 5 | def evalTempoMIR(trackEvaluated, trackGT): 6 | return mir_eval.tempo.detection(np.array([trackGT.tempo, trackGT.tempo]), 0, np.array([trackEvaluated.tempo, trackEvaluated.tempo]), tol=0.1) 7 | 8 | 9 | def evalTempoMadmom(trackEvaluated, trackGT, window=0.04): 10 | F1 = trackEvaluated.tempo > trackGT.tempo * \ 11 | (1-window) and trackEvaluated.tempo < trackGT.tempo * (1+window) 12 | 13 | F2 = (trackEvaluated.tempo > trackGT.tempo * (1-window) and trackEvaluated.tempo < trackGT.tempo * (1+window)) \ 14 | or (trackEvaluated.tempo > trackGT.tempo/3 * (1-window) and trackEvaluated.tempo < trackGT.tempo/3 * (1+window)) \ 15 | or (trackEvaluated.tempo > trackGT.tempo/2 * (1-window) and trackEvaluated.tempo < trackGT.tempo/2 * (1+window)) \ 16 | or (trackEvaluated.tempo > trackGT.tempo*2 * (1-window) and trackEvaluated.tempo < trackGT.tempo*2 * (1+window)) \ 17 | or (trackEvaluated.tempo > trackGT.tempo*3 * (1-window) and trackEvaluated.tempo < trackGT.tempo*3 * (1+window)) 18 | 19 | F3 = (trackEvaluated.tempo > trackGT.tempo * (1-window) and trackEvaluated.tempo < trackGT.tempo * (1+window)) \ 20 | or (trackEvaluated.tempo > trackGT.tempo/3 * (1-window) and trackEvaluated.tempo < trackGT.tempo/3 * (1+window)) \ 21 | or (trackEvaluated.tempo > trackGT.tempo/2 * (1-window) and trackEvaluated.tempo < trackGT.tempo/2 * (1+window)) \ 22 | or (trackEvaluated.tempo > trackGT.tempo*2 * (1-window) and trackEvaluated.tempo < trackGT.tempo*2 * (1+window)) \ 23 | or (trackEvaluated.tempo > trackGT.tempo*3 * (1-window) and trackEvaluated.tempo < trackGT.tempo*3 * (1+window)) \ 24 | or (trackEvaluated.tempo > trackGT.tempo*2/3 * (1-window) and trackEvaluated.tempo < trackGT.tempo*2/3 * (1+window)) \ 25 | or (trackEvaluated.tempo > trackGT.tempo*3/4 * (1-window) and trackEvaluated.tempo < trackGT.tempo*3/4 * (1+window)) \ 26 | or (trackEvaluated.tempo > trackGT.tempo*4/3 * (1-window) and trackEvaluated.tempo < trackGT.tempo*4/3 * (1+window)) \ 27 | or (trackEvaluated.tempo > trackGT.tempo*5/4 * (1-window) and trackEvaluated.tempo < trackGT.tempo*5/4 * (1+window)) 28 | 29 | return F1, F2, F3 30 | -------------------------------------------------------------------------------- /automix/model/eval/cueEval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Do the peakpicking and compute the metrics 3 | """ 4 | import numpy as np 5 | 6 | from automix.featureExtraction.lowLevel import PeakPicking 7 | from automix.model.classes import Signal 8 | 9 | 10 | def getMetricsSparse(A, C, minDistance=0.5): 11 | """ 12 | return the precision, recall, f, in function of list of times. 13 | You can specify the minDistance 14 | """ 15 | if not A: 16 | return {"precision": 0, "recall": 0, "fMeasure": 0} 17 | 18 | precision = len([a for a in A if any([np.abs(a - c) < minDistance for c in C])]) / len(A) #probability that A is correct 19 | recall = len([c for c in C if any([np.abs(a - c) < minDistance for a in A])]) / len(C) #% of C we find 20 | fMeasure = 2 * (precision * recall) / (precision + recall) 21 | return {"precision": precision, "recall": recall, "fMeasure": fMeasure} 22 | 23 | 24 | def getMetricsDense(dfA, dfC): 25 | """ 26 | Return metrics from binary list such as [0,1,0], [1,1,0] where the index should be the same events 27 | dfA are the samples where the antecedent are True or False 28 | dfC are the samples where the consequents are True or False 29 | """ 30 | support = dfA.sum() 31 | precision = (dfA & dfC).sum() / dfA.sum() #P(A&C)/P(A) or P(C|A) 32 | recall = (dfA & dfC).sum() / dfC.sum() 33 | lift = precision / (dfC.sum() / len(dfA)) 34 | fMeasure = 2 * (precision * recall / (precision + recall)) 35 | return { 36 | "support": round(support, 2), 37 | "precision": round(precision, 2), 38 | "recall": round(recall, 2), 39 | "lift": round(lift, 2), 40 | "fMeasure": round(fMeasure, 2) 41 | } 42 | 43 | 44 | def findPhaseLocal(signal: Signal, grid: Signal, period: int, toleranceWindow=0): 45 | """ 46 | find the phase of the signal based on it's amplitude at the grid positions and the number of peaks 47 | - signal: works best with a discrete signal as no aglomeration is done 48 | - grid: positions of the beats 49 | - period: the periodicity to test 50 | - tolerance window: if not at 0, returns the closest value in the signal to the grid, within the tolerance window 51 | 52 | test: 53 | # result = findPhase(Signal(np.ones(5), times=np.array([0, 4, 8, 9, 12])+1), Signal(np.ones(16), times=range(16)), period=4) 54 | # print(result) = 1 55 | """ 56 | phases = [] 57 | for phase in range(period): 58 | values = [signal.getValue(grid.times[i], toleranceWindow=0.1) for i in range(phase, len(grid), period)] 59 | values = [v for v in values if v is not None] 60 | phases.append((np.sum(values) * len(values))) 61 | 62 | bestPhase = np.argmax(phases) 63 | return bestPhase 64 | 65 | 66 | def getPhase(track, features, period): 67 | """ 68 | Get the phase of the track depending on all the features specified and the period 69 | """ 70 | from automix.utils.quantization import clusterValues 71 | 72 | phasePerFeature = [] 73 | for feature in features: 74 | phasePerFeature.append(findPhaseLocal(feature, track.features["strongBeats"], period=period)) 75 | counts = np.bincount(phasePerFeature) 76 | # print(phases, counts, np.argmax(counts)) 77 | return np.argmax(counts) 78 | 79 | 80 | def getScore(features, 81 | aggregation="independant", 82 | relativeThreshold=0.3, 83 | top=3, 84 | returnCues=False, 85 | minDistancePeak=32, 86 | minDistanceCluster=0, 87 | period=2): 88 | """ 89 | Compute for each track and each feature provided, the top k peaks. Then aggregate them to compute the score, 90 | - Feature top: use only the n first peaks of each feature 91 | - returnCues: Return the peaks on not the score 92 | - minDistance: aggregate the peaks under minDistance 93 | - aggregation: Concatenate the peaks. Can either extract the peaks from all the features independently (independant) 94 | Or take the peaks in the cruve after multiplying all the features element wise (multiply) 95 | """ 96 | from automix.utils.quantization import clusterValues, findPhase, quantize 97 | 98 | cues = [] 99 | gtCues = [] 100 | result = {} 101 | 102 | for i, track in enumerate(tracks): 103 | # for feature in features: 104 | # #Snap the peaks to the closest strong beat 105 | # newCues = quantize(track.features["strongBeats"], newCues) 106 | 107 | #Concatenate all the peaks from all the features 108 | peakSignals = [] 109 | pp = PeakPicking(parameterMinDistance=minDistancePeak, parameterRelativeThreshold=relativeThreshold) 110 | if aggregation == "independant": 111 | peakSignals = [pp.predictOne(track.features[feature])[0] for feature in features] 112 | newCues = np.concatenate([signal.times for signal in peakSignals]) 113 | 114 | elif aggregation == "multiply": 115 | #If the aggregation is set to multipl, the features shouldn't be the peaks but the novelty 116 | newCurve = np.ones(len(track.features[features[0]].values)) 117 | for feature in features: 118 | newCurve = np.multiply(newCurve, track.features[feature].values) 119 | peakSignals = pp.predictOne(Signal(newCurve, times=track.features[features[0]].times)) 120 | newCues = peakSignals[0].times 121 | 122 | for cue in newCues: 123 | if cue not in track.features["strongBeats"].times: 124 | print("cue not on strongbeat", cue) 125 | 126 | # #Snap the peaks to the closest strong beat 127 | # newCues = quantize(track.features["strongBeats"], newCues) 128 | 129 | #Cluster the peaks to remove close outliers 130 | if minDistanceCluster: 131 | newCues = clusterValues(newCues, minDistance=minDistanceCluster) 132 | else: 133 | newCues = list(set(newCues)) 134 | newCues.sort() 135 | 136 | #Identify the beat-period 137 | if period and newCues: 138 | phase = getPhase(track, peakSignals, period) 139 | inPhase = track.features["strongBeats"].times[phase:-1:period] 140 | newCues = [cue for cue in newCues if cue in inPhase] 141 | 142 | firstK = newCues[:top] 143 | cues += firstK 144 | 145 | result[track.name] = { 146 | "cues": firstK, #Cues candidates 147 | "cuesFeature": { 148 | features[j]: len([1 for t in signal.times if t in firstK]) / len(firstK) if len(firstK) else 0 149 | for j, signal in enumerate(peakSignals) 150 | }, 151 | } 152 | 153 | if any(gttracks): 154 | gtCues += gttracks[i].features["boundaries"] 155 | result[track.name]["gtCues"] = gttracks[i].features["boundaries"] #Cuesannotated 156 | result[track.name]["gtCuesFeature"] = { 157 | features[j]: len([ 158 | 1 for t in signal.times 159 | if t in firstK and any([np.abs(t - gtT) <= 0.5 for gtT in gttracks[i].features["boundaries"]]) 160 | ]) / len(gttracks[i].features["boundaries"]) 161 | for j, signal in enumerate(peakSignals) 162 | } 163 | 164 | if returnCues: 165 | return result 166 | result = getMetricsSparse(cues, gtCues) 167 | # print(len(cues), len(gtCues), result) 168 | return result 169 | -------------------------------------------------------------------------------- /automix/model/inputOutput/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MZehren/Automix/dfaa00a9e7c5c0938c0a9d275c07f3a3e5f87e43/automix/model/inputOutput/__init__.py -------------------------------------------------------------------------------- /automix/model/inputOutput/downloader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MZehren/Automix/dfaa00a9e7c5c0938c0a9d275c07f3a3e5f87e43/automix/model/inputOutput/downloader/__init__.py -------------------------------------------------------------------------------- /automix/model/inputOutput/downloader/downloaders.py: -------------------------------------------------------------------------------- 1 | """ 2 | contains all the functions to download fromn streaming services 3 | """ 4 | 5 | from urllib.parse import urlparse, parse_qs 6 | from bs4 import BeautifulSoup 7 | 8 | from subprocess import PIPE, Popen 9 | import youtube_dl 10 | 11 | 12 | def oneThousandOneMedia(mediaJson, folder): 13 | """ 14 | Parse the JSON returned by getMedia function of 1001tracklist 15 | """ 16 | if "player" in mediaJson and "soundcloud" in mediaJson["player"]: 17 | return downloadSoundCloud(BeautifulSoup(mediaJson["player"], 'html.parser').find("iframe")["src"], folder) 18 | elif "player" in mediaJson and "youtube" in mediaJson["player"]: 19 | print("YOUTUBE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") 20 | return downloadYoutube(url=BeautifulSoup(mediaJson["player"], 'html.parser').find("iframe")["src"], folder=folder) 21 | 22 | 23 | def downloadSoundCloud(url, folder): 24 | """ 25 | Download the soundcloud music from the url 26 | either https://w.soundcloud.com/player/?url=https://api.soundcloud.com/tracks/419898048 27 | &show_artwork=true&color=D6DCFE 28 | or https://api.soundcloud.com/tracks/419898048 29 | in the specified folder 30 | 31 | returns the filename 32 | """ 33 | # parse the url to see if it contains a query 34 | urlParsed = urlparse(url) 35 | queryParsed = parse_qs(urlParsed.query) 36 | if 'url' in queryParsed and queryParsed['url']: 37 | url = queryParsed['url'][0] 38 | 39 | # do the call 40 | # TODO: enhance the filename parsing. it's not working correctly 41 | args = ['scdl', '-l', url, '--path', folder, '-c', '--hide-progress'] # '--addtimestamp' 42 | process = Popen(args, stderr=PIPE) 43 | output = process.stderr.read() 44 | splitedOutput = output.decode("utf-8").split("\n") 45 | if len(splitedOutput) == 7: # just downloaded 46 | path = splitedOutput[-3][5:-12] 47 | else: # already downloaded 48 | path = splitedOutput[-2][12:-25] + ".mp3" 49 | 50 | return path 51 | 52 | 53 | def downloadYoutube(youTubeID="", url="", folder="", path=""): 54 | """ 55 | Download youtube video fron an ID 56 | """ 57 | url = "https://www.youtube.com/watch?v=" + youTubeID if youTubeID else url 58 | outtmpl = folder + '%(title)s.%(ext)s' if folder else path + ".%(ext)s" 59 | 60 | # youTube = YouTube(url) 61 | # thisStream = youTube.streams.filter( 62 | # only_audio=True).order_by('resolution').desc().first() 63 | # path = folder + youTube.title + ".mp4" 64 | 65 | # if thisStream: 66 | # try: #retrieve the cache 67 | # with open(path, 'r'): 68 | # pass 69 | # except IOError: 70 | # thisStream.download(folder) 71 | ydl_opts = { 72 | 'format': 'bestaudio/best', 73 | 'postprocessors': [{ 74 | 'key': 'FFmpegExtractAudio', 75 | 'preferredcodec': 'mp3', 76 | 'preferredquality': '192', 77 | }], 78 | 'outtmpl': outtmpl, 79 | 'gettitle': 1 80 | } 81 | with youtube_dl.YoutubeDL(ydl_opts) as ydl: 82 | infos = ydl.extract_info(url) 83 | return str(infos[u"title"]) + ".mp3" # Track(name=yt.title, path=path) 84 | 85 | # print(downloadYoutube(youTubeID="vPIaMZSmLc4", folder="./")) -------------------------------------------------------------------------------- /automix/model/inputOutput/downloader/mixesDBProxy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Enables us to download from mixesdb.com 3 | """ 4 | import urllib2 5 | import re 6 | 7 | from model.inputOutput import reaperProxy, downloaders 8 | from model.classes.deck import Deck 9 | 10 | 11 | def getMixesDBMix(url, outputFolder): 12 | """ 13 | create a reaper file in the outputfolder from the mix. 14 | the url should be https://www.mixesdb.com/w/2018-05-12_-_Camelphat_-_Essential_Mix 15 | """ 16 | # parse the url 17 | page = urllib2.urlopen(url).read() 18 | 19 | tracks = [] 20 | for ytId in re.findall("data-youtubeid=\\\\\"(.{11})\\\\\"", page): 21 | tracks.append(downloaders.downloadYoutube(ytId, outputFolder)) 22 | tracks[-1].position = 101 23 | 24 | with open("mix.RPP", 'w') as outfile: 25 | outfile.write( 26 | reaperProxy.getReaperProject(decks=[Deck(tracks=tracks)])) 27 | 28 | 29 | # getMixesDBMix("https://www.mixesdb.com/w/2018-05-12_-_Camelphat_-_Essential_Mix", 30 | # "annotations/mixes/House/") 31 | -------------------------------------------------------------------------------- /automix/model/inputOutput/downloader/oneThousandOnetracklistsProxy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for downloading tracklist from https://www.1001tracklists.com/ 3 | """ 4 | import json 5 | import logging 6 | import re 7 | import time 8 | from urllib.request import urlopen 9 | 10 | from bs4 import BeautifulSoup 11 | 12 | from automix.model.classes.track import Track 13 | from automix.model.inputOutput.serializer import DBSerializer 14 | 15 | 16 | # from automix.model.inputOutput.downloader import downloadSo, downloadYoutube 17 | class Container(dict): 18 | """ 19 | Dictionnary with a add method not removing values 20 | # TODO replace with a default list dictionnary 21 | """ 22 | 23 | def add(self, key, value): 24 | if key in self: 25 | if not isinstance(self[key], list): 26 | self[key] = [self[key]] 27 | self[key].append(value) 28 | 29 | else: 30 | self[key] = value 31 | 32 | 33 | def scrapOverview(rangeToParse=range(0, 100), sleep=6): 34 | """ 35 | Parse the page containing all the mixes 36 | """ 37 | db = DBSerializer() 38 | for i in rangeToParse: 39 | logging.info("page :" + str(i)) 40 | url = "https://www.1001tracklists.com/index" + str(i) + ".html" 41 | page = urlopen(url) 42 | soup = BeautifulSoup(page, 'html.parser') 43 | for soupTracklist in soup.findAll("div", {"class": "tlLink"}): 44 | tlUrl = "https://www.1001tracklists.com" + soupTracklist.find("a")["href"] 45 | if db.exist({"_id": tlUrl}): 46 | logging.info(tlUrl + " already exist") 47 | continue 48 | 49 | mix = scrapTrackList(url=tlUrl) 50 | 51 | try: 52 | id = tlUrl #TODO: find a better ID, the url is based on the name which is subject to change 53 | db.insert(mix, id) 54 | logging.info(" - " + id[-20:] + " inserted") 55 | except Exception as e: 56 | logging.info(" - " + id[-20:] + "not inserted") 57 | logging.warn(e) 58 | 59 | time.sleep(sleep) 60 | 61 | 62 | def scrapTrackList(url="https://www.1001tracklists.com/tracklist/20gg7q7t/wankelmut-1live-dj-session-2019-04-20.html"): 63 | """ 64 | Parse the page containing the whole tracklist 65 | """ 66 | # init 67 | page = urlopen(url) 68 | soup = BeautifulSoup(page, 'html.parser') 69 | 70 | # Parse the mix data 71 | # TODO: add date of recording instead of date of publication 72 | mix = scrapMeta(soup.find("div", {"itemtype": "http://schema.org/MusicPlaylist"})) 73 | mix.add("medias", scrapMediaMix(soup.find("div", {"id": "mediaItems"}))) 74 | 75 | # Parse the tracklist 76 | tracks = [] 77 | for soupTrack in soup.findAll("div", {"itemtype": "http://schema.org/MusicRecording"}): 78 | if "tgHid" in soupTrack.parent.parent["class"]: 79 | continue # TODO: do we remove the hiden tracks ? 80 | track = Container() 81 | for soupTrackMeta in soupTrack.findAll("meta"): 82 | track.add(soupTrackMeta["itemprop"], soupTrackMeta["content"]) 83 | 84 | track.add("medias", scrapMediaTrack(soupTrack)) 85 | 86 | order = soupTrack.parent.parent.find("td", {"class": "left"}).findAll("span") 87 | if len(order): 88 | track.add("order", order[-1].text) 89 | if track["order"] == "w/" and len(tracks): 90 | track["order"] = tracks[-1]["order"] 91 | try: 92 | track["order"] = int(track["order"]) 93 | except ValueError: 94 | track["order"] = "" 95 | else: 96 | # Track has been likely removed 97 | continue 98 | 99 | track.add("position", soupTrack.parent.parent.find("td", {"class": "left"}).find("div").text) 100 | 101 | tracks.append(track) 102 | # for all tracks in the track list: 103 | # mixedTracks = [] 104 | # playerSelector = CSSSelector('div[class="s32"]') 105 | # itemSelector = CSSSelector('table.tl tr.tlpItem') 106 | # playSelector = CSSSelector('div[title="play position"]') 107 | # nameSelector = CSSSelector('meta[itemprop="name"]') 108 | 109 | mix.add("tracks", tracks) 110 | return mix 111 | 112 | 113 | def scrapMeta(soup): 114 | """ 115 | return the metadata from any element inside soup: 116 | Look for: 117 | """ 118 | container = Container() 119 | for soupMeta in soup.findAll("meta"): 120 | prop = soupMeta["itemprop"] 121 | value = soupMeta["content"] 122 | container.add(prop, value) 123 | return container 124 | 125 | 126 | def scrapMediaMix(soup): 127 | """ 128 | Return the media links from a mix (different than from a track) 129 | Soup should be a "mediaItemsTop" 130 | calls get_medialink.php?idMedia 131 | """ 132 | medias = [] 133 | for soupDiv in soup.findAll("div"): 134 | try: 135 | if soupDiv.has_attr("data-idmedia"): 136 | requestUrlUrl = "https://www.1001tracklists.com/ajax/get_medialink.php?idMedia=" + soupDiv[ 137 | "data-idmedia"] + "&dontHide=true&showInline=true" 138 | jsonResult = json.loads(urlopen(requestUrlUrl).read())["data"][0] 139 | medias.append(jsonResult) 140 | except Exception as e: 141 | logging.warn(e) 142 | return medias 143 | 144 | 145 | def scrapMediaTrack(soup): 146 | """ 147 | Scrap the media link for Youtube, soundcloud, etc, 148 | input: soup output from a track element in a playlist 149 | TODO: calls get_medialink.php?idObject=...&idItem=... in another function 150 | """ 151 | onClickContent = soup.parent.find("div", {"class": "addMedia"}) 152 | # If it's a track, then the media should be in a addMedia div 153 | try: 154 | idItem = re.search("idItem: ([0-9]+)", str(onClickContent)).group(1) 155 | idObject = (re.search("idObject: ([0-9]+)", str(onClickContent)).group(1)) 156 | return {"idItem": idItem, "idObject": idObject} 157 | except AttributeError: 158 | return 159 | 160 | def requestMediaLinks(media): 161 | """ 162 | from the media information in 1001 website ({"idItem": idItem, "idObject": idObject}) 163 | request the links with the AJAX request to get_medialink.php 164 | """ 165 | if isinstance(media, str): 166 | media = media.replace("'", '"') 167 | media = json.loads(media) 168 | requestUrlUrl = "https://www.1001tracklists.com/ajax/get_medialink.php?idObject=" + media["idObject"] + "&idItem=" + media["idItem"] 169 | jsonResult = json.loads(urlopen(requestUrlUrl).read()) 170 | 171 | # soundcloud 172 | if 'success' in jsonResult and jsonResult["success"]: 173 | return jsonResult["data"] 174 | 175 | 176 | if __name__ == '__main__': 177 | logging.getLogger().setLevel(logging.INFO) 178 | scrapOverview(rangeToParse=range(1, 1000)) 179 | scrapOverview(rangeToParse=range(200, 1000)) 180 | -------------------------------------------------------------------------------- /automix/model/inputOutput/serializer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MZehren/Automix/dfaa00a9e7c5c0938c0a9d275c07f3a3e5f87e43/automix/model/inputOutput/serializer/__init__.py -------------------------------------------------------------------------------- /automix/model/inputOutput/serializer/dbSerializer.py: -------------------------------------------------------------------------------- 1 | import mongoengine 2 | from pymongo import MongoClient 3 | 4 | from automix import config 5 | from automix.model.inputOutput.serializer.serializer import Serializer 6 | 7 | 8 | class DBSerializer(Serializer): 9 | """ 10 | class handling jams serialization of track or mixes 11 | """ 12 | 13 | def __init__(self): 14 | client = MongoClient() 15 | self.db = client["1001tracklists"] 16 | self.mixes = self.db.mixes 17 | # TODO singleton? 18 | 19 | def insert(self, mix, id): 20 | mix["_id"] = id 21 | self.mixes.insert_one(mix) 22 | 23 | def retrieive(self, match): 24 | return self.mixes.find(match) 25 | 26 | def delete(self, match): 27 | self.mixes.remove(match) 28 | 29 | def exist(self, match): 30 | result = self.retrieive(match) 31 | return result.count() 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /automix/model/inputOutput/serializer/featureSerializer.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class FeatureSerializer(object): 5 | """ 6 | Serialize and deserialize features from a path 7 | """ 8 | 9 | @staticmethod 10 | def deserialize(path): 11 | """ 12 | Returns a dictionary containing the json content of the file 13 | """ 14 | serializedFeatures = {} 15 | try: 16 | with open(path) as file: 17 | serializedFeatures = decode(file.read()) 18 | except Exception: 19 | return None 20 | 21 | return serializedFeatures 22 | 23 | @staticmethod 24 | def serialize(path, features): 25 | """ 26 | Writte the features in json 27 | Features has to be/contain a dict, list, primitive, or object implementing jsonEncode(void):any 28 | """ 29 | with open(path, 'w') as featuresFile: 30 | featuresFile.write(json.dumps(features, cls=MyJSONEncoder)) 31 | 32 | 33 | class MyJSONEncoder(json.JSONEncoder): 34 | """ 35 | My own version of JSONEncoders 36 | This implementation takes care of objects containing a jsonEncode method 37 | """ 38 | 39 | def default(self, obj): # pylint: disable=E0202 40 | try: 41 | result = obj.jsonEncode() #TODO: make it explicit that the objects need to implement jsonEncode 42 | if isinstance(result, dict): 43 | result["type"] = str(type(obj)) 44 | return result 45 | except Exception: 46 | pass 47 | 48 | try: 49 | return obj.tolist() 50 | except Exception: 51 | pass 52 | 53 | return json.JSONEncoder.default(self, obj) 54 | 55 | 56 | def decode(stringObject): 57 | jsonObject = json.loads(stringObject) 58 | return recursiveMap(jsonObject) 59 | 60 | 61 | def recursiveMap(obj): 62 | """ 63 | recursively map all the fields of the json decoded object to class from the model 64 | """ 65 | try: 66 | from automix.model.classes.signal import Signal, SparseSignal, SparseSegmentSignal 67 | if isinstance(obj, dict): 68 | if u'type' in obj and (obj[u"type"] == str(Signal) or obj[u"type"] == str(SparseSignal)): 69 | obj = Signal.jsonDeserialize(obj) 70 | elif u'type' in obj and obj[u"type"] == str(SparseSegmentSignal): 71 | obj = SparseSegmentSignal.jsonDeserialize(obj) 72 | else: 73 | for key, value in obj.items(): 74 | obj[key] = recursiveMap(obj[key]) 75 | elif isinstance(obj, list): 76 | for key, value in enumerate(obj): 77 | obj[key] = recursiveMap(obj[key]) 78 | except Exception: 79 | pass 80 | 81 | return obj 82 | -------------------------------------------------------------------------------- /automix/model/inputOutput/serializer/graphvizSerializer.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import os 3 | from typing import List 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | from graphviz import Digraph 8 | 9 | from automix.featureExtraction import Estimator 10 | from automix.model.classes import Signal 11 | from automix.model.inputOutput.serializer.serializer import Serializer 12 | 13 | 14 | class GraphvizSerializer(Serializer): 15 | """ 16 | class handling jams serialization of track or mixes 17 | """ 18 | 19 | def addslashes(self, s): 20 | forbiddenCharacter = ["\\", "\0", "'", '"'] 21 | for i in forbiddenCharacter: 22 | if i in s: 23 | s = s.replace(i, "") 24 | return s 25 | 26 | def serializeEstimators(self, graph: List[Estimator], filename="computation_graph.pdf", features=None): 27 | 28 | s = Digraph('structs', node_attr={'shape': 'plaintext'}, filename=filename) 29 | # Add nodes with inputs and outputs 30 | tmpFiles = [] 31 | for i, estimator in enumerate(graph): 32 | # s.attr('node', fillcolor='yellow', style='filled') 33 | inputs = "" + "".join(["" + el + "" for el in np.hstack(estimator.inputs)]) + "" 34 | name = "" + self.addslashes(str(estimator)) + "" 36 | outputs = "" + "".join(["" + el + "" for el in estimator.outputs]) + "" 37 | 38 | # Save temporary images to put on the graph 39 | imageFile = None 40 | if features: 41 | imageFile = str(i) + "tmp.png" 42 | tmpFiles.append(imageFile) 43 | figure = plt.figure() 44 | for output in [o for o in estimator.outputs if o in features]: 45 | if type(features[output]) == Signal: 46 | features[output].plot(label=output) 47 | if len(estimator.outputs) > 1: 48 | plt.legend() 49 | figure.savefig(imageFile) 50 | plt.close(figure) 51 | 52 | s.node(str(i), 53 | label='<' + inputs + name + outputs + '
>', 54 | image=imageFile, 55 | imagepos='ml', 56 | imagescale='true') 57 | 58 | # Add edges 59 | edges = [] 60 | for i, source in enumerate(graph): 61 | for j, target in enumerate(graph[i + 1:]): 62 | j = j + i + 1 63 | for output, input in itertools.product(source.outputs, np.hstack(target.inputs)): 64 | if input == output: 65 | edges.append((str(i) + ":" + input, str(j) + ":" + output)) 66 | s.edges(edges) 67 | s.view() 68 | 69 | for f in tmpFiles: 70 | os.remove(f) 71 | -------------------------------------------------------------------------------- /automix/model/inputOutput/serializer/serializer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | class Serializer(object): 4 | """ 5 | Abstract class for serializers 6 | """ 7 | 8 | # def serializeTrack(self, path, track: Track): 9 | # """ 10 | # Base method to serialize a mix 11 | # """ 12 | # raise NotImplementedError() 13 | 14 | # def serializeMix(self, path: str, decks: List[Deck], BPM=120, markers: List = [], subfolder: str = ""): 15 | # """ 16 | # Base method to serialize a mix 17 | # """ 18 | # raise NotImplementedError() 19 | 20 | # @staticmethod 21 | # def deserializeTrack(path: str) -> Track: 22 | # """ 23 | # Base static method to deserialize a track "settings" 24 | # """ 25 | # raise NotImplementedError() 26 | 27 | # @staticmethod 28 | # def loadFolder(path: str): 29 | # """ 30 | # Base method to deserialize a folder 31 | # """ 32 | # result = [] 33 | # for root, dirs, files in os.walk(path): 34 | # for file in files: 35 | # result.append(deserializeTrack) 36 | -------------------------------------------------------------------------------- /automix/model/inputOutput/serializer/sonification.py: -------------------------------------------------------------------------------- 1 | """ 2 | contains wave export with a possible click to sonify events in the tracks such as beats 3 | """ 4 | import numpy as np 5 | from scipy.io import wavfile 6 | import mir_eval 7 | 8 | from automix.model.classes.track import Track 9 | 10 | def sonifyClicks(ticks, audioHQ, sr, outputPath=None): 11 | """ 12 | Put a click at each estimated beat in beats array 13 | todo: look at mir_eval which is used by msaf 14 | 15 | ticks can either be [time] or [[time,barPosition]] 16 | """ 17 | # audioHQ, sr = Track.readFile(inputPath) 18 | # msaf.utils.sonify_clicks(audio_hq, np.array(tick), outputPath, sr) 19 | 20 | # Create array to store the audio plus the clicks 21 | # outAudio = np.zeros(len(audioHQ) + 100) 22 | 23 | # Assign the audio and the clicks 24 | outAudio = audioHQ 25 | if isinstance(ticks[0], list): 26 | audioClicks = getClick( 27 | [tick[0] for tick in ticks if tick[1] != 1], 28 | sr, 29 | frequency=1500, 30 | volume=0.8, 31 | length=len(outAudio)) 32 | outAudio[:len(audioClicks)] += audioClicks 33 | 34 | audioClicks2 = getClick( 35 | [tick[0] for tick in ticks if tick[1] == 1], 36 | sr, 37 | frequency=1000, 38 | volume=1, 39 | length=len(outAudio)) 40 | outAudio[:len(audioClicks2)] += audioClicks2 41 | else: 42 | audioClicks = mir_eval.sonify.clicks(ticks, sr) #getClick(ticks, sr, frequency=1500, length=len(outAudio)) 43 | outAudio[:len(audioClicks)] += audioClicks 44 | 45 | # Write to file 46 | if outputPath: 47 | wavfile.write(outputPath, sr, outAudio) 48 | return outAudio 49 | 50 | 51 | def getClick(clicks, fs, frequency=1000, offset=0, volume=1, length=0): 52 | """ 53 | Generate clicks (this should be done by mir_eval, but its 54 | latest release is not compatible with latest numpy) 55 | """ 56 | times = np.array(clicks) + offset 57 | 58 | # 1 kHz tone, 100ms with Exponential decay 59 | click = np.sin(2 * np.pi * np.arange(fs * .1) * frequency / 60 | (1. * fs)) * volume 61 | click *= np.exp(-np.arange(fs * .1) / (fs * .01)) 62 | 63 | if not length: 64 | length = int(times.max() * fs + click.shape[0] + 1) 65 | 66 | return mir_eval.sonify.clicks(times, fs, click=click, length=length) -------------------------------------------------------------------------------- /automix/model/inputOutput/serializer/traktorSerializer.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | 3 | from automix.model.classes import Signal 4 | from automix.model.classes.track import Track 5 | 6 | class TraktorSerializer(object): 7 | @staticmethod 8 | def tracktorDeserialize(path, titles=None): 9 | """ 10 | get a track from the xml format from tracktor (1?) 11 | """ 12 | tree = ET.parse(path) 13 | root = tree.getroot() 14 | tracks = {} 15 | for entry in root.find("COLLECTION").iter("ENTRY"): 16 | track = Track() 17 | track.name = entry.attrib["TITLE"] 18 | track.path = entry.find("LOCATION").attrib["FILE"][:-4] #Removing .mp3 19 | cues = [cue for cue in entry.iter("CUE_V2") if cue.attrib["NAME"] != "AutoGrid"] 20 | track.features["Cues"] = Signal([cue.attrib["NAME"][:7] for cue in cues], 21 | times=[float(cue.attrib["START"]) / 1000 for cue in cues], 22 | sparse=True) 23 | tracks[track.path] = track 24 | if titles: 25 | for t in titles: 26 | if t in tracks: 27 | yield tracks[t] 28 | else: 29 | print(t, "not in collection") 30 | dummytrack = Track() 31 | dummytrack.features["Cues"] = Signal(times=[]) 32 | yield dummytrack 33 | # return [tracks[t] if t in tracks else Track() for t in titles] 34 | else: 35 | return tracks.values() 36 | 37 | 38 | # bla = TraktorSerializer.tracktorDeserialize( 39 | # "/home/mickael/Documents/programming/dj-tracks-switch-points/evaluation/mixed in key/collection.nml") 40 | # print(bla) -------------------------------------------------------------------------------- /automix/model/inputOutput/serializer/veireerializer.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | 3 | from automix.model.classes import Signal 4 | from automix.model.classes.track import Track 5 | 6 | class TraktorSerializer(object): 7 | @staticmethod 8 | def tracktorDeserialize(path, titles=None): 9 | """ 10 | get a track from the xml format from tracktor (1?) 11 | """ 12 | tree = ET.parse(path) 13 | root = tree.getroot() 14 | tracks = {} 15 | for entry in root.find("COLLECTION").iter("ENTRY"): 16 | track = Track() 17 | track.name = entry.attrib["TITLE"] 18 | track.path = entry.find("LOCATION").attrib["FILE"][:-4] #Removing .mp3 19 | cues = [cue for cue in entry.iter("CUE_V2") if cue.attrib["NAME"] != "AutoGrid"] 20 | track.features["Cues"] = Signal([cue.attrib["NAME"][:7] for cue in cues], 21 | times=[float(cue.attrib["START"]) / 1000 for cue in cues], 22 | sparse=True) 23 | tracks[track.path] = track 24 | if titles: 25 | return [tracks[t] if t in tracks else None for t in titles] 26 | return tracks.values() 27 | 28 | 29 | # bla = TraktorSerializer.tracktorDeserialize( 30 | # "/home/mickael/Documents/programming/dj-tracks-switch-points/evaluation/mixed in key/collection.nml") 31 | # print(bla) -------------------------------------------------------------------------------- /automix/model/inputOutput/serializer/xmlSerializer.py: -------------------------------------------------------------------------------- 1 | class XmlSerialiser(object): 2 | @staticmethod 3 | def xmlDeserialize(path): 4 | """ 5 | get a track from the SegmXML format: http://www.ifs.tuwien.ac.at/mir/audiosegmentation.html 6 | """ 7 | tree = ET.parse(path) 8 | root = tree.getroot() 9 | track = Track() 10 | 11 | track.name = root.find('metadata').find('song').find('title').text 12 | track.segments = [ 13 | Segment(segment.attrib['label'], start=segment.attrib['start_sec'], end=segment.attrib['end_sec']) 14 | for segment in root.find('segmentation').iter('segment') 15 | ] 16 | return track 17 | -------------------------------------------------------------------------------- /automix/model/inputOutput/template/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MZehren/Automix/dfaa00a9e7c5c0938c0a9d275c07f3a3e5f87e43/automix/model/inputOutput/template/__init__.py -------------------------------------------------------------------------------- /automix/model/inputOutput/template/item.template.RPP: -------------------------------------------------------------------------------- 1 | 22 | [STRETCHMARKER] 23 | > -------------------------------------------------------------------------------- /automix/model/inputOutput/template/project.template.RPP: -------------------------------------------------------------------------------- 1 | 28 | 30 | RENDER_FILE "" 31 | RENDER_PATTERN "" 32 | RENDER_FMT 0 2 0 33 | RENDER_1X 0 34 | RENDER_RANGE 1 0 0 18 1000 35 | RENDER_RESAMPLE 3 0 1 36 | RENDER_ADDTOPROJ 0 37 | RENDER_STEMS 0 38 | RENDER_DITHER 0 39 | TIMELOCKMODE 1 40 | TEMPOENVLOCKMODE 1 41 | ITEMMIX 0 42 | DEFPITCHMODE 589824 43 | TAKELANE 1 44 | SAMPLERATE 44100 0 0 45 | 47 | LOCK 1 48 | 55 | GLOBAL_AUTO -1 56 | TEMPO [BPM] 4 4 57 | PLAYRATE 1 0 0.25 4 58 | SELECTION 0 0 59 | SELECTION2 0 0 60 | MASTERAUTOMODE 0 61 | MASTERTRACKHEIGHT 0 62 | MASTERPEAKCOL 16576 63 | MASTERMUTESOLO 0 64 | MASTERTRACKVIEW 0 0.6667 0.5 0.5 0 0 0 65 | MASTERHWOUT 0 0 1 0 0 0 0 -1 66 | MASTER_NCH 2 2 67 | MASTER_VOLUME 1 0 -1 -1 1 68 | MASTER_FX 1 69 | MASTER_SEL 0 70 | 77 | 85 | [MARKERS] 86 | 88 | [TRACKS] 89 | > -------------------------------------------------------------------------------- /automix/model/inputOutput/template/track.template.RPP: -------------------------------------------------------------------------------- 1 | 32 | 40 | 52 | FLOATPOS 0 0 0 0 53 | FXID {[3BandEQID]} 54 | 62 | 70 | 78 | WAK 0 79 | BYPASS 0 0 0 80 | 83 | FLOATPOS 0 0 0 0 84 | FXID {[HPFLPFID]} 85 | 93 | 101 | WAK 0 102 | > 103 | [ITEMS] 104 | > -------------------------------------------------------------------------------- /automix/rules/__init__.py: -------------------------------------------------------------------------------- 1 | # from https://www.youtube.com/watch?v=RvvitRlYClU 2 | # Loops of (4-8-16) beats should be always in sync. If the bass line of the first track stop, the second one should begin 3 | # if there is two bass line, remove one with the EQ 4 | # if the loop is 4 beats long. Just repeat it 4 times. 5 | # it's ok if two tracks are not in phase as long as they share the same tempo 6 | 7 | # From Paolo: 8 | # If there is a lot going on in the music (i.e. anything which is not kicks) we should not fade it slowly but 9 | 10 | # From me: 11 | # We should not overlap a ternary piece with a binary one 12 | # we should compute the correlation of the overlaping. the better the correlation, the better the transition 13 | 14 | # From https://www.scientificamerican.com/article/psychology-workout-music/ 15 | # cadence in the lyrics is very important to give the energy of a piece opf music. 16 | # tempo are: 120bpm is the most common. 160-180bpm if running on a treadmile. 145bpm is the ceiling though 17 | 18 | # From https://www.pyramind.com/training/online/dj101-will-marshall-3-6 19 | # EQ 20 | # try not to add a boost if you can avoid it because it distord the audio (but you could remove the other bands and turn up the gain) 21 | # you can slowly incorporate the highs, but dependingon the contest, you may not have to 22 | # you want to swap the basse 23 | # The style of the EQ depends on the style of the track 24 | 25 | import numpy as np 26 | import sys 27 | 28 | from automix.rules.harmonicTransitionRule import HarmonicTransitionRule 29 | from automix.rules.percussiveTransitionRule import PercussiveTransitionRule 30 | from automix.rules.activityRule import ActivityRule 31 | from automix.rules.eventsAlignmentRule import EventsAlignmentRule 32 | from automix.rules.suitableKeyRule import SuitableKeyRule 33 | from automix.rules.veireTransitionsRule import VeireTransitionRule 34 | 35 | def runAll(mix, boundaries, switches): 36 | """ 37 | run all the rules for this mix. 38 | Return the weighted average score as well as a string describing the individual scores for debuging 39 | 40 | Parameters: 41 | mix: an array of Deck containing tracks as weel as effect 42 | boundaries: a tuple indicating the boundaries in seconds where the rules should be applied. if None, the whole mix is going to be analyzed 43 | """ 44 | if boundaries is None: 45 | boundaries = [0, sys.maxsize] # TODO: lookup the real boundaries 46 | 47 | rules = [ 48 | VeireTransitionRule() 49 | # Veire: transitions types (relaxed, rolling, double drop), vocal clash detection, onset detection matching 50 | # EventsAlignmentRule(), ActivityRule(weight=2) 51 | ] # MaxPlayrateChangeRule(), BeatOverlapPrecisionRule(), SuitableKeyRule(), HarmonicTransitionRule(), PercussiveTransitionRule() 52 | average = 0. 53 | ruleComputed = 0 54 | logs = [] 55 | for rule in rules: 56 | result = rule.run(mix, boundaries, switches) 57 | logs.append((str(rule), result)) # logging.info(str(rule), result) 58 | if not np.isnan(result): 59 | average += result * rule.weight 60 | ruleComputed += rule.weight 61 | if ruleComputed: 62 | return average / ruleComputed, logs 63 | else: 64 | return 0, logs 65 | -------------------------------------------------------------------------------- /automix/rules/activityRule.py: -------------------------------------------------------------------------------- 1 | from automix.rules.rule import Rule 2 | from automix.utils.quantization import separateInEvenGrids 3 | from automix.model.classes.signal import Signal 4 | 5 | 6 | class ActivityRule(Rule): 7 | """ 8 | some tracks contain empty segments (such as only vocal tracks). Make sure that those segments are actually overlaped with sound 9 | """ 10 | 11 | # def runOne(self, trackA, trackB, boundaries): 12 | # for track in [trackA, trackB]: 13 | # volume = track.features["barMSE"].getValues(*boundaries) 14 | 15 | def run(self, mix, boundaries): 16 | tracks = Rule.getTracks(mix, boundaries) 17 | noiseThreshold = 0.1 18 | silenceRatio = 0.1 19 | 20 | masterSignal = Signal([], times=[]) 21 | for track in tracks: 22 | postFXSignal = track.applyEffects(track.getFeature("barMSE")) 23 | postFXSignal.times = track.getDeckTime(postFXSignal.times) 24 | masterSignal.addSignal(postFXSignal) 25 | 26 | values = masterSignal.getValues(*boundaries) 27 | proportion = float(len([value for value in values if value < noiseThreshold])) / len(values) 28 | if proportion > silenceRatio: 29 | return 1-proportion 30 | else: 31 | return 1 32 | 33 | def __str__(self): 34 | return "Minimum Activity" 35 | -------------------------------------------------------------------------------- /automix/rules/eventsAlignmentRule.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from automix.rules.rule import Rule 4 | from automix.utils import quantization 5 | 6 | 7 | class EventsAlignmentRule(Rule): 8 | """ 9 | The structure of the tracks should be aligned 10 | TODO: change to the loop instead of the segments ? 11 | """ 12 | 13 | def __init__(self, event="boundaries"): 14 | raise DeprecationWarning() 15 | self.event = event 16 | super(EventsAlignmentRule, self).__init__() 17 | 18 | def getEvent(self, track): 19 | if self.event == "boundaries": 20 | return track.features["boundaries"] 21 | if self.event == "kick": 22 | return [ 23 | float(onset[0]) for onset in track.adtOnsets if onset[1] == "0" 24 | ] 25 | if self.event == "beat": 26 | return track.getBeats() 27 | 28 | def run(self, mix, boundaries): 29 | tracks = Rule.getTracks(mix, boundaries) 30 | # set threshold of deviations 31 | # 20Hz, lowest hearable frequency. below 50ms between two notes, we (should) hear only one note 32 | # if the deviation is above an eighth note it's a different beat, thus it's ok. 33 | minThreshold = 0.05 34 | # deckTempo = max([track.features["tempo"] * track.playRate for track in tracks]) 35 | 36 | # compute the deck's location of each event 37 | # we also remove events outside of the overlaping areas 38 | # We still need to align before and after the boundaries of each tracks because we feel the structure/beat in long period of time 39 | 40 | # returns: beforeOverlapA, startOverlapA, endTrackA, afterEndTrackA, startTrackB, endOverlapB, afterOverlapB 41 | # localTimes = [ 42 | # Rule.getTransitionLocalTimes( 43 | # tracks[i], tracks[i + 1], windowInBeats=window) 44 | # for i in range(len(tracks) - 1) 45 | # ] 46 | localTimes = Rule.getTransitionsLocalTimes(tracks) 47 | 48 | overlapsEvents = [([ 49 | tracks[i].getDeckTime(event, enableExceeding=False) 50 | for event in self.getEvent(tracks[i]) 51 | if event > localTimes[i][0] and event < localTimes[i][3] 52 | ], [ 53 | tracks[i + 1].getDeckTime(event, enableExceeding=False) 54 | for event in self.getEvent(tracks[i + 1]) 55 | if event > localTimes[i][4] and event < localTimes[i][6] 56 | ]) for i in range(len(tracks) - 1)] 57 | 58 | # compute the distance between events for each overlaps 59 | overlapErrors = [ 60 | np.abs(quantization.diff(trackAEvents, 61 | trackBEvents, maxThreshold=10000)) 62 | for trackAEvents, trackBEvents in overlapsEvents 63 | ] 64 | 65 | # if no segments can be aligned for one transition, it's 0 66 | if not len([overlap for overlap in overlapErrors if len(overlap)]): 67 | return 0 68 | 69 | # add a 1 or a 0 for each event which should overlap (distance < maxDistance) if the difference is perceptible (distance > minDistance) 70 | result = np.min([ 71 | np.mean([ 72 | 1 if distance < minThreshold else 0 for distance in distances 73 | ]) for distances in overlapErrors 74 | ]) 75 | 76 | return result 77 | 78 | def __str__(self): 79 | return self.event + "overlap" 80 | -------------------------------------------------------------------------------- /automix/rules/harmonicTransitionRule.py: -------------------------------------------------------------------------------- 1 | from automix.rules.rule import Rule 2 | from automix.rules.transitionRule import TransitionRule 3 | 4 | 5 | class HarmonicTransitionRule(TransitionRule): 6 | """ 7 | Rule rating the transition between two tracks high if and only if at maximum 8 | one of the transitioning parts is harmonic. 9 | """ 10 | 11 | @staticmethod 12 | def score(trackA, trackB): 13 | _, startOverlapA, endTrackA, _, startTrackB, endOverlapB, _ \ 14 | = Rule.getTransitionLocalTimes(trackA, trackB) 15 | 16 | def hasHarmonicTransition(track, start, end): 17 | values = track.getHarmonic().getValues(start, end) 18 | if not values: 19 | return False 20 | return max(values) 21 | 22 | return float(not hasHarmonicTransition(trackA, startOverlapA, endTrackA) 23 | or not hasHarmonicTransition(trackB, startTrackB, endOverlapB)) 24 | -------------------------------------------------------------------------------- /automix/rules/lengthRule.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from backend.rules.rule import Rule 4 | 5 | 6 | class LengthRule(Rule): 7 | """ 8 | The mixed track has to be played for it's majority otherwise it will not be good 9 | """ 10 | 11 | def run(self, mix): 12 | tracks = Rule.getTracks(mix) 13 | return np.mean([ 14 | track.length / (track.duration / track.playRate) 15 | for track in tracks 16 | ]) 17 | 18 | def __str__(self): 19 | return "Majority played" 20 | -------------------------------------------------------------------------------- /automix/rules/maxPlayrateChangeRule.py: -------------------------------------------------------------------------------- 1 | from rules.rule import Rule 2 | 3 | class MaxPlayrateChangeRule(Rule): 4 | """ 5 | if the playRate is affected by more than 20% it's going to sound bad 6 | returns 0 if one of the track's playrate is changed by 20% or more 7 | returns 1 if at most the tracks' playrate is changed by 5% 8 | """ 9 | 10 | def __init__(self, maxPlayRateChange=0.2, minPlayRateChange=0.05): 11 | self.maxPlayRateChange = maxPlayRateChange 12 | self.minPlayRateChange = minPlayRateChange 13 | super(MaxPlayrateChangeRule, self).__init__() 14 | 15 | def run(self, mix, boundaries): 16 | tracks = Rule.getTracks(mix) 17 | change = max([abs(track.playRate - 1) for track in tracks]) 18 | score = 1 if change < self.minPlayRateChange else 1 - \ 19 | (change-self.minPlayRateChange)/(self.maxPlayRateChange-self.minPlayRateChange) 20 | return max(score, 0) 21 | 22 | def __str__(self): 23 | return "PlayRate" 24 | -------------------------------------------------------------------------------- /automix/rules/percussiveTransitionRule.py: -------------------------------------------------------------------------------- 1 | from automix.rules.rule import Rule 2 | from automix.rules.transitionRule import TransitionRule 3 | 4 | 5 | class PercussiveTransitionRule(TransitionRule): 6 | """ 7 | Rule rating the transition between two tracks high if and only if at maximum 8 | one of the transitioning parts is purely percussive. 9 | """ 10 | 11 | @staticmethod 12 | def score(trackA, trackB): 13 | _, startOverlapA, endTrackA, _, startTrackB, endOverlapB, _ \ 14 | = Rule.getTransitionLocalTimes(trackA, trackB) 15 | 16 | def hasPurelyPercussiveTransition(track, start, end): 17 | harmonic = track.getHarmonic().getValues(start, end) 18 | if not harmonic or max(harmonic): 19 | return False 20 | return max(track.getPercussive().getValues(start, end)) 21 | 22 | return float( 23 | not hasPurelyPercussiveTransition(trackA, startOverlapA, endTrackA) 24 | or not hasPurelyPercussiveTransition(trackB, startTrackB, 25 | endOverlapB)) 26 | -------------------------------------------------------------------------------- /automix/rules/rule.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from automix.model.classes.track import Track 3 | 4 | class Rule(object): 5 | """ 6 | abstract class which holds a rule 7 | """ 8 | 9 | def __init__(self, weight=1): 10 | self.weight = weight 11 | self.description = self.__class__.__name__ 12 | 13 | def __str__(self): 14 | return self.description 15 | 16 | def run(self, mix, **kwarg): 17 | """ 18 | return a value between 0 and 1 telling how well the rule is applied in the transition 19 | 20 | Args: 21 | mix (Deck[]): all the information from the data model. 22 | A mix is a collection of Decks containing Tracks at specific positions and effects 23 | kwarg: 24 | - Boundaries 25 | - switches 26 | """ 27 | raise NotImplementedError("To be implemented") 28 | 29 | @staticmethod 30 | def getTransitionLocalTimes(trackA, trackB, windowInBeats=16): 31 | """ 32 | returns Specific times from the overlapings between two tracks 33 | the window is time before and after the POIs 34 | """ 35 | windowInSeconds = windowInBeats * 60 / \ 36 | trackA.features["tempo"] / trackA.playRate 37 | # times in the track timeline 38 | beforeOverlapA = trackA.getTrackTime(trackB.position - windowInSeconds) 39 | startOverlapA = trackA.getTrackTime(trackB.position) 40 | endTrackA = trackA.getTrackTime(trackA.length + trackA.position) 41 | afterEndTrackA = trackA.getTrackTime(trackA.length + trackA.position + 42 | windowInSeconds) 43 | startTrackB = trackB.soffs 44 | endOverlapB = trackB.getTrackTime(trackA.getDeckTime(endTrackA)) 45 | afterOverlapB = trackB.getTrackTime( 46 | trackA.getDeckTime(endTrackA) + windowInSeconds) 47 | 48 | return beforeOverlapA, startOverlapA, endTrackA, afterEndTrackA, startTrackB, endOverlapB, afterOverlapB 49 | 50 | @staticmethod 51 | def getTransitionsLocalTimes(tracks): 52 | """ 53 | sort the tracks and returns Specific times from the overlaps in the tracks provided 54 | """ 55 | # tracks = Rule.getTracks(mix, boundaries) 56 | tracks.sort(key=lambda track: track.position) 57 | return [Rule.getTransitionLocalTimes(tracks[i], tracks[i+1]) for i in range(0, len(tracks)-1, 2)] 58 | 59 | @staticmethod 60 | def getTracks(mix, boundaries) -> List[Track]: 61 | """ 62 | return the unordered tracks from the mix 63 | """ 64 | return [track for deck in mix for track in deck.tracks if boundaries[0] < track.getEndPosition() and boundaries[1] > track.position] 65 | -------------------------------------------------------------------------------- /automix/rules/suitableKeyRule.py: -------------------------------------------------------------------------------- 1 | from numpy import mean 2 | 3 | from automix.featureExtraction.key import edmkeyProxy as edm 4 | from automix.rules.transitionRule import TransitionRule 5 | 6 | 7 | class SuitableKeyRule(TransitionRule): 8 | """ 9 | Rule rating the transition between two tracks based on how well their keys 10 | fit according to the mirex score. 11 | """ 12 | 13 | @staticmethod 14 | def score(trackA, trackB): 15 | convertedKeyA = edm.convertKey(trackA.getKey()) 16 | convertedKeyB = edm.convertKey(trackB.getKey()) 17 | return edm.mirexScore(convertedKeyA, convertedKeyB) -------------------------------------------------------------------------------- /automix/rules/transitionRule.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from automix.rules.rule import Rule 4 | 5 | 6 | class TransitionRule(Rule): 7 | """ 8 | Abstract superclass for all rules regarding the pair-wise transition 9 | between songs of a mix. 10 | """ 11 | 12 | @staticmethod 13 | def score(trackA, trackB): 14 | """Calculates the score of two transitioning tracks. To be implemented 15 | by any subclass. 16 | 17 | Args: 18 | trackA (Track): The track that transitions into the second track. 19 | trackB (Track): The track into that the first track transitions. 20 | 21 | Returns: 22 | float: A score in [0, 1] grading the transition. 23 | 24 | """ 25 | 26 | raise NotImplementedError("To be implemented") 27 | 28 | def run(self, mix, boundaries): 29 | """Calculates the score of the mix as the mean of the scores for all 30 | transitions between tracks. Overrides the method of the superclass. 31 | """ 32 | 33 | # TODO update description? 34 | 35 | # sort out, which tracks of the mix transition into which tracks 36 | tracks = self.getTracks(mix, boundaries) 37 | tracks.sort(key=lambda x: x.getStartPosition()) 38 | scores = [] 39 | for i in range(len(tracks) - 1): 40 | trackA = tracks[i] 41 | for j in range(i + 1, len(tracks)): 42 | trackB = tracks[j] 43 | if trackA.getEndPosition() <= trackB.getStartPosition(): 44 | break 45 | # trackA transitions into trackB 46 | # compute score 47 | scores.append(self.score(trackA, trackB)) 48 | 49 | if not scores: 50 | # no transitions detected. return best score 51 | return 1.0 52 | 53 | return np.mean(scores) -------------------------------------------------------------------------------- /automix/rules/veireTransitionsRule.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from automix.rules.rule import Rule 4 | from automix.featureExtraction.lowLevel import CoreFinder 5 | from automix.model.classes.signal import Signal 6 | 7 | 8 | class VeireTransitionRule(Rule): 9 | """ 10 | The transition occurs on specific locations in the mix: 11 | double drop: on each track the transition is on a down to up 12 | 13 | A\B dd - du - ud - uu 14 | dd 15 | du x 16 | ud x** 17 | uu x* 18 | 19 | * the track A needs a ud 16 bars after the uu 20 | ** the track B needs to start 16 bars before the cue 21 | """ 22 | 23 | def run(self, mix, boundaries, switches): 24 | tracks = Rule.getTracks(mix, boundaries) 25 | # cf = CoreFinder(parameterIncludeBorders=False) 26 | # for A, B, switchPositions in [(tracks[i], tracks[i + 1], switches[i]) for i in range(len(switches))]: 27 | # start, switch, stop = switchPositions 28 | 29 | # # compute if it's a core in all the segments (start-switch and switch-stop) are full segments or not 30 | # # TODO don't call the predict one, it's too slow 31 | # aCore, bCore = [ 32 | # cf.predictOne(track.features["samples"], 33 | # Signal(1, times=[track.getTrackTime(start), 34 | # track.getTrackTime(switch), 35 | # track.getTrackTime(stop)]))[0] for track in [A, B] 36 | # ] 37 | 38 | # isDoubleDrop = not aCore[0] and aCore[1] and not bCore[0] and bCore[1] 39 | # isRolling = aCore[0] and aCore[1] and not bCore[0] and bCore[1] # TODO: implement the aCore[2] == False 40 | # isRelaxed = aCore[0] and not aCore[1] and not bCore[0] and not aCore[1] #TODO: implement the aCore[0] == start of the track 41 | # if isDoubleDrop or isRolling or isRelaxed: 42 | # return 1 43 | # return 0 44 | if len(tracks) < 2: 45 | return 0 46 | scores = [] 47 | for A, B, switchPositions in [(tracks[i], tracks[i + 1], switches[i]) for i in range(len(switches))]: 48 | start, switch, stop = switchPositions 49 | coreStartA, coreSwitchA, coreStopA = [ 50 | A.features["core"].getValue(A.getTrackTime(time), toleranceWindow=0.1) for time in switchPositions 51 | ] 52 | coreStartB, coreSwitchB, coreStopB = [ 53 | B.features["core"].getValue(B.getTrackTime(time), toleranceWindow=0.1) for time in switchPositions 54 | ] 55 | 56 | isDoubleDrop = not coreStartA and not coreStartB and coreSwitchA and coreSwitchB 57 | 58 | isRolling = coreStartA and coreSwitchA and not coreStopA and not coreStartB and coreSwitchB and coreStopB 59 | 60 | isRelaxed = coreStartA and not coreSwitchA and not coreStopA and not coreStartB and not coreSwitchB #TODO and start of the song here 61 | if isDoubleDrop: 62 | self.description = "Double drop" 63 | scores.append(1) 64 | elif isRolling: 65 | self.description = "Rolling" 66 | scores.append(1) 67 | elif isRelaxed: 68 | self.description = "Relaxed" 69 | scores.append(1) 70 | else: 71 | scores.append(0) 72 | return np.mean(scores) 73 | 74 | -------------------------------------------------------------------------------- /automix/tests/testFeatureExtraction.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from scipy.stats import hmean 5 | 6 | from automix import config 7 | from automix.featureExtraction.lowLevel import (CoreFinder, PeakPicking, PeakSelection, Windowing) 8 | from automix.featureExtraction.structure.eval import evalCuesMutliple 9 | from automix.model.classes import (DenseSignal, Signal, SparseSegmentSignal, SparseSignal, Track) 10 | from automix.model.inputOutput.serializer import JamsSerializer 11 | 12 | 13 | class TestFeatureExtraction(unittest.TestCase): 14 | def testPeakPicking(self): 15 | # Basic test 16 | myInput = Signal([1.5, 1, 0, 0, 0, 2, 0], sampleRate=1) 17 | pp = PeakPicking(parameterMinDistance=2, parameterRelativeThreshold=0.2) 18 | result = pp.predictOne(myInput)[0] 19 | self.assertEqual(list(result.values), [1.5, 2.]) 20 | self.assertEqual(list(result.times), [0, 5]) 21 | 22 | def testPeakSelection(self): 23 | # Basic test 24 | myInput = [Signal(1, times=[0, 5, 10]), Signal(0.5, times=[0, 10])] 25 | ps = PeakSelection() 26 | result = ps.predictOne(myInput, None, None)[0] 27 | self.assertEqual(list(result.values), [1.5, 1, 1.5]) 28 | self.assertEqual(list(result.times), [0, 5, 10]) 29 | 30 | # Test with mean 31 | ps = PeakSelection(parameterMergeFunction=np.mean) 32 | result = ps.predictOne(myInput, None, None)[0] 33 | self.assertEqual(list(result.values), [0.75, 1, 0.75]) 34 | self.assertEqual(list(result.times), [0, 5, 10]) 35 | 36 | def testStructureEval(self): 37 | # basic tests 38 | result = evalCuesMutliple([Signal(1, times=[1, 3, 2]), Signal(1, times=[1, 2, 3])], 39 | [Signal(1, times=[1, 2, 3]), Signal(1, times=[1, 2])]) 40 | self.assertEqual(result["recall"], 1) 41 | self.assertEqual(result["precision"], 5 / 6) 42 | self.assertEqual(result["fMeasure"], hmean([1, 5 / 6])) 43 | 44 | result = evalCuesMutliple([Signal(1, times=[1, 2, 4, 5, 6])], [Signal(1, times=[1, 2, 3, 4])], limitSearchSpace=True) 45 | self.assertEqual(result["recall"], 3 / 4) 46 | self.assertEqual(result["precision"], 3 / 3) 47 | 48 | result = evalCuesMutliple([Signal(1, times=[1, 2, 5, 6]), Signal(1, times=[1, 2, 3, 4])], 49 | [Signal(1, times=[1, 2, 3, 4]), Signal(1, times=[1, 2, 5, 6])], 50 | averagePerDocument=True, 51 | returnDetails=True, 52 | limitSearchSpace=True) 53 | self.assertEqual(result["recall"], [0.5, 0.5]) 54 | self.assertEqual(result["precision"], [1, 0.5]) 55 | self.assertEqual(result["fMeasure"], list(hmean([[0.5, 0.5], [1, 0.5]], axis=0))) 56 | 57 | # test duration 58 | result = evalCuesMutliple([Signal(1, times=[1, 2, 3])], [SparseSegmentSignal(1, [[1, 2], [1.6, 2.2]])]) 59 | self.assertEqual(result["recall"], 1) 60 | self.assertEqual(result["precision"], 2 / 3) 61 | 62 | def testSignal(self): 63 | # test sort of the values 64 | signal = Signal([5, 4, 3, 2, 1], times=[5, 4, 3, 2, 1]) 65 | self.assertEqual(list(signal.values), [1, 2, 3, 4, 5]) 66 | 67 | # test assertion of duplicate values 68 | # with self.assertRaises(AssertionError): 69 | signal = Signal([5, 4, 3, 2, 1], times=[5, 4, 3, 3, 1]) 70 | 71 | # test Qantization: remove doubles 72 | signal = Signal(1, times=[0, 1.1, 2.1, 2.5, 2.6, 2.7, 3.1]) 73 | grid = Signal(-1, times=list(range(5))) 74 | signal.quantizeTo(grid) 75 | # In case 1 value is exactly between 2 grid ticks, use the smallest one 76 | self.assertEqual(list(signal.values), [1, 1, 2, 3]) 77 | 78 | # test Quantization: maxThreshold for out of bound 79 | signal = Signal(1, times=[0, 1.1, 2.0, 2.1, 2.6, 2.7, 3.1]) 80 | signal.quantizeTo(grid, maxThreshold=0.2) 81 | self.assertEqual(list(signal.times), [0, 1, 2, 3]) 82 | self.assertEqual(list(signal.values), [1, 1, 2, 1]) 83 | 84 | # test quantization: don't remove duplicates 85 | signal = Signal(1, times=[0, 1.1, 2.0, 2.1, 2.6, 2.7, 3.1]) 86 | signal.quantizeTo(grid, maxThreshold=0.2, removeDuplicatedValues=False) 87 | self.assertEqual(list(signal.times), [0, 1, 2, 2, 3]) 88 | self.assertEqual(list(signal.values), [1, 1, 1, 1, 1]) 89 | 90 | # test quantization: don't remove out of bounds 91 | signal = Signal(1, times=[0, 1.1, 2.0, 2.1, 2.6, 2.7, 3.1]) 92 | signal.quantizeTo(grid, maxThreshold=0.2, removeDuplicatedValues=False, removeOutOfBound=False) 93 | self.assertEqual(list(signal.times), [0, 1, 2, 2, 2.6, 2.7, 3]) 94 | self.assertEqual(list(signal.values), [1, 1, 1, 1, 1, 1, 1]) 95 | 96 | # test getIndex 97 | signal = Signal(1, times=[0, 1.1, 2.0, 2.1, 2.6, 2.7, 3.1]) 98 | idx = signal.getIndex(2.2, toleranceWindow=0.5) 99 | self.assertEqual(idx, 3) 100 | 101 | # test clusterSignals 102 | result = Signal.clusterSignals([SparseSignal(1, [0, 1, 2, 3]), SparseSignal(0, [1, 3, 3.1])], minDistance=0, mergeValue=np.mean) 103 | self.assertEqual(list(result.times), [0, 1, 2, 3, 3.1]) 104 | self.assertEqual(list(result.values), [1, 0.5, 1, .5, 0]) 105 | 106 | 107 | def testSparseSegmentSignal(self): 108 | signal = SparseSegmentSignal(1, [[0, 1], [1, 2]]) 109 | idx = signal.getIndex(2.2, toleranceWindow=0.5) 110 | self.assertEqual(idx, 1) 111 | 112 | idx = signal.getIndex(1.2, toleranceWindow=0.5) 113 | self.assertEqual(idx, 1) 114 | 115 | def testWindow(self): 116 | # RMS value normalize by sample number 117 | w = Windowing() 118 | input = Signal(2, times=list(range(20))) 119 | result, = w.predictOne(input, Signal(0, times=[-1, 2.1, 3.1, 4.1, 20])) 120 | self.assertEqual(result[0], result[2]) 121 | 122 | def testJamsSerialization(self): 123 | aggregatedSignal = JamsSerializer.aggregateAnnotations([[{ 124 | "time": 0, 125 | "duration": 0, 126 | "confidence": 0 127 | }], [{ 128 | "time": 0, 129 | "duration": 5, 130 | "confidence": 0 131 | }]]) 132 | self.assertEqual(len(aggregatedSignal), 1) 133 | 134 | def testCoreFinder(self): 135 | cf = CoreFinder(parameterIncludeBorders=False) 136 | values = DenseSignal(range(10), 1) 137 | grid = SparseSignal(1, [0, 5, 10]) 138 | result = cf.predictOne(values, grid)[0] 139 | self.assertEqual(result.values.tolist(), [False, True]) 140 | self.assertEqual(result.times.tolist(), [[0, 5], [5, 10]]) 141 | 142 | values = SparseSignal(range(10), list(range(10))) 143 | grid = SparseSignal(1, [0, 5, 10]) 144 | result = cf.predictOne(values, grid)[0] 145 | self.assertEqual(result.values.tolist(), [False, True]) 146 | self.assertEqual(result.times.tolist(), [[0, 5], [5, 10]]) 147 | 148 | def testCheckerboard(self): 149 | """Check the addZerosStart which is not implemented yet. Needs to be updated.""" 150 | from automix.featureExtraction.novelty import Checkerboard 151 | c = Checkerboard() 152 | 153 | values = DenseSignal(np.random.rand(100, 5), 1) 154 | novelty1 = Checkerboard(parameterAddZerosStart=True).predictOne(values)[0] 155 | novelty2 = Checkerboard(parameterAddZerosStart=0).predictOne(values)[0] 156 | novelty3 = Checkerboard(parameterAddZerosStart=None).predictOne(values)[0] 157 | self.assertGreater(novelty2[0], novelty1[0]) 158 | self.assertEqual(novelty3[0], 0) 159 | 160 | def testResults(self): 161 | """Check that the precision is not going down with update of the code (takes time to execute)""" 162 | tp, gttp = config.GET_PAOLO_FULL(checkCompletude=True) 163 | tracks = [Track(path=path) for path in tp] 164 | gttracks = [JamsSerializer.deserializeTrack(track, agreement=0.5) for track in gttp] 165 | result = evalCuesMutliple([track.features["selectedPeaks"] for track in tracks], 166 | [track.features["switchIn"] for track in gttracks], 167 | limitSearchSpace=True) 168 | self.assertGreater(result["fMeasure"], 0.6538) 169 | 170 | 171 | if __name__ == '__main__': 172 | unittest.main() 173 | -------------------------------------------------------------------------------- /automix/transition/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Package which generates the transitions. 3 | """ -------------------------------------------------------------------------------- /automix/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility package with functions used by multiple packages 3 | """ 4 | 5 | import numpy as np 6 | 7 | from automix.utils import quantization 8 | 9 | 10 | def groupEventsInBar(beats, events): 11 | """ 12 | Compute the number of event (i.e. kicks) occuring during this bar. 13 | returns a 2D array of the form : [[downbeatTimestampInSeconds, #events],...] 14 | """ 15 | beatsTime = [beat[0] for beat in beats] 16 | events = quantization.quantize(beatsTime, events) 17 | downbeat = [beat[0] for beat in beats if beat[1] == 1] 18 | 19 | result = [[downbeat[i], len([event for event in events if event > downbeat[i] and event < downbeat[i + 1]])] 20 | for i in range(len(downbeat) - 1)] 21 | return result 22 | 23 | 24 | def getStructureFromEvents(eventCountPerBar, minBarSpawn=4): 25 | """ 26 | try to find boundaries where there is a big change in the input 27 | TODO: make this function more general 28 | """ 29 | flagBars = [[eventCountPerBar[i][0], eventCountPerBar[i][1] > 0] for i in range(len(eventCountPerBar))] 30 | changeIndexes = [i for i in range(len(flagBars) - 1) if flagBars[i][1] != flagBars[i + 1][1]] 31 | groupLengths = np.diff(changeIndexes) 32 | boundaryIndexes = [ 33 | changeIndexes[i + 1] for i in range(len(groupLengths) - 1) 34 | if groupLengths[i] >= minBarSpawn and groupLengths[i + 1] >= minBarSpawn 35 | ] 36 | boundaryTimes = [flagBars[i + 1][0] for i in boundaryIndexes] 37 | 38 | return boundaryTimes 39 | 40 | 41 | def KnuthMorrisPratt(text, pattern): 42 | '''Yields all starting positions of copies of the pattern in the text. 43 | Calling conventions are similar to string.find, but its arguments can be 44 | lists or iterators, not just strings, it returns all matches, not just 45 | the first one, and it does not need the whole text in memory at once. 46 | Whenever it yields, it will have read the text exactly up to and including 47 | the match that caused the yield.''' 48 | 49 | # allow indexing into pattern and protect against change during yield 50 | pattern = list(pattern) 51 | 52 | # build table of shift amounts 53 | shifts = [1] * (len(pattern) + 1) 54 | shift = 1 55 | for pos in range(len(pattern)): 56 | while shift <= pos and pattern[pos] != pattern[pos - shift]: 57 | shift += shifts[pos - shift] 58 | shifts[pos + 1] = shift 59 | 60 | # do the actual search 61 | startPos = 0 62 | matchLen = 0 63 | result = [] 64 | for c in text: 65 | while matchLen == len(pattern) or \ 66 | matchLen >= 0 and pattern[matchLen] != c: 67 | startPos += shifts[matchLen] 68 | matchLen -= shifts[matchLen] 69 | matchLen += 1 70 | if matchLen == len(pattern): 71 | result.append(startPos) 72 | 73 | return result 74 | 75 | 76 | def hertzToNote(freq): 77 | """ 78 | see https://pages.mtu.edu/~suits/NoteFreqCalcs.html 79 | returns the closest note corresponding to the freq in Hz 80 | i.e. : hertzToNote(130)='C' 81 | """ 82 | raise DeprecationWarning() 83 | notes = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "E#", "F", "G", "G#"] 84 | semitonesDiff = range(-36, 37) 85 | computedFreqs = [] 86 | if not computedFreqs: 87 | A4 = 440 88 | computedFreqs = [A4 * pow(1.059463094359, i) for i in semitonesDiff] 89 | ClosestSemitomeDiff = np.argmin([abs(freq - computedFreq) for computedFreq in computedFreqs]) 90 | 91 | return notes[semitonesDiff[ClosestSemitomeDiff] % len(notes)] 92 | 93 | 94 | def normalize(array): 95 | """ 96 | normalize an array by changing its values to 0 to 1 97 | """ 98 | # TODO move to a better location 99 | max = np.max(array) 100 | min = np.min(array) 101 | return ((array - min) / (max - min)).tolist() 102 | -------------------------------------------------------------------------------- /automix/utils/perceptiveLoudness/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MZehren/Automix/dfaa00a9e7c5c0938c0a9d275c07f3a3e5f87e43/automix/utils/perceptiveLoudness/__init__.py -------------------------------------------------------------------------------- /automix/utils/perceptiveLoudness/loudness.py: -------------------------------------------------------------------------------- 1 | """This module provides functions to calculate the perceptive loudness of 2 | signals in different forms. 3 | 4 | """ 5 | 6 | from bisect import bisect_left 7 | from essentia.standard import ReplayGain, Resample 8 | from librosa.core import db_to_amplitude, istft 9 | 10 | 11 | def loudnessSignal(y, sr): 12 | """Calculates the loudness of a signal. 13 | 14 | Args: 15 | y (list of float): The signal. 16 | sr (int, optional): The sample rate of the signal. 17 | 18 | Returns: 19 | float: The negative replay gain as the loudness in dB of the signal. 20 | 21 | """ 22 | 23 | # Only these samplerates are accepted by ReplayGain() 24 | supportedSamplerates = [8000, 32000, 44100, 48000] 25 | 26 | if sr in supportedSamplerates: 27 | # Sample rate is okay. No need to change signal 28 | yNew = y 29 | srNew = sr 30 | else: 31 | # Samplerate is not okay 32 | # Resample the signal to fit 33 | 34 | # Find next higher supported sample rate 35 | idx = bisect_left(sorted(supportedSamplerates), sr) 36 | idx = min(idx, len(supportedSamplerates) - 1) 37 | srNew = supportedSamplerates[idx] 38 | # Resample signal 39 | fResample = Resample( 40 | inputSampleRate=sr, outputSampleRate=srNew, quality=0) 41 | yNew = fResample(y) 42 | 43 | fReplayGain = ReplayGain(sampleRate=srNew) 44 | loudness = -(fReplayGain(yNew) + 14) # Offset replay gain by 14 dB 45 | return loudness 46 | 47 | 48 | def normalizeSignal(y, sr): 49 | """Normalizes a signal by its loudness. 50 | 51 | Args: 52 | y (list of float): The signal. 53 | sr (int, optional): The sample rate of the signal. 54 | 55 | Returns: 56 | list of float: The signal normalized so that its loudness is 0. 57 | 58 | """ 59 | 60 | loudness = loudnessSignal(y, sr) 61 | normalizationDivisor = db_to_amplitude(loudness) 62 | yNormalized = [sample / normalizationDivisor for sample in y] 63 | return yNormalized 64 | 65 | 66 | def loudnessSTFTMatrix(matrix, sr, **kwargs): 67 | """Calculates the loudness of a signal encoded by its STFT matrix. 68 | 69 | Args: 70 | matrix (np.ndarray): STFT matrix of the actual signal. 71 | sr (int, optional): The sample rate of the input signal. 72 | **kwargs: Keywords for istft() (see 73 | https://librosa.github.io/librosa/generated/librosa.core.istft.html) 74 | 75 | Returns: 76 | float: The negative replay gain as the loudness in dB of the signal. 77 | 78 | """ 79 | 80 | # Convert STFT matrix to signal and use loudnessSignal() to obtain loudness 81 | # TODO: this is inefficient 82 | y = istft(matrix, **kwargs) 83 | return loudnessSignal(y, sr) 84 | -------------------------------------------------------------------------------- /automix/utils/quantization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions to do quantization 3 | """ 4 | import math 5 | from bisect import bisect_left 6 | from typing import List 7 | import itertools 8 | 9 | import numpy as np 10 | from scipy.stats import mode 11 | from automix.model.classes.signal import Signal 12 | 13 | 14 | def separateInEvenGrids(ticks, regularityThreshold=0.1, tickLength=None): 15 | ''' 16 | from [0,1,2,3,4,10,11,12] 17 | returns [[0,1,2,3,4],[10,11,12]] 18 | 19 | Create a new group of values if the step between two values is deviated by 10% or more of the average step between each tick 20 | the regularityThreshold can be used to fine tune the threshold 21 | ''' 22 | ticksGroups = [[ticks[0]]] 23 | ticksDiffs = np.diff(ticks) 24 | if tickLength is None: 25 | tickLength = mode(ticksDiffs)[0][0] 26 | # std = np.std(ticksDiffs) 27 | precision = regularityThreshold * tickLength 28 | for i in range(len(ticks) - 1): 29 | if abs(ticksDiffs[i] - tickLength) < precision: 30 | ticksGroups[-1].append(ticks[i + 1]) 31 | else: 32 | ticksGroups.append([ticks[i + 1]]) 33 | 34 | return [group for group in ticksGroups if len(group) > 1] 35 | 36 | 37 | def quantizeOne(grid, value): 38 | """ 39 | return the closest grid tick to the value 40 | """ 41 | closestTick = np.argmin([abs(tick - value) for tick in grid]) 42 | return grid[closestTick] 43 | 44 | 45 | # def quantize(grid, values, maxThreshold=-1, removeOutOfBound=True, removeDuplicatedValues=True): 46 | # """ 47 | # Align values to a grid 48 | # Parameters: 49 | # maxThreshold specify the maximum distance between the value and the closest tick in the grid. 50 | # by default it'll take half of the most common difference in the grid 51 | # removeOutOfBound: if there is no tick near the value AND removeOutOfBound, the value is removed. 52 | # if removeOutOfBound is False, then the original value is kept 53 | # removeDuplicatedValues: If two values are quantized to the same grid tick, it's going to get removed 54 | 55 | # return the list of values quantized 56 | # """ 57 | # # computes the max threshold if not specified 58 | # if maxThreshold == -1: 59 | # if len(grid) > 1: 60 | # maxThreshold = float(mode(np.diff(grid))[0][0]) / 2 61 | # else: 62 | # maxThreshold = 0 63 | 64 | # # creates a dict associating each value to his closest ground truth 65 | # alignementValues = findGridNeighbor(grid, values) 66 | 67 | # # replace the value by its closest grid tick, as long as it's close enough 68 | # alignementValues = ((originalValue, newValue if abs(originalValue - newValue) <= maxThreshold else originalValue) 69 | # for originalValue, newValue in alignementValues 70 | # if not (removeOutOfBound and abs(originalValue - newValue) > maxThreshold)) 71 | 72 | # # compute the list of results from the dictionnary 73 | # if removeDuplicatedValues: 74 | # return list(sorted(set([newValue for originalValue, newValue in alignementValues]))) 75 | # else: 76 | # return [newValue for originalValue, newValue in alignementValues] 77 | 78 | 79 | def diff(grid, values, maxThreshold=-1): 80 | """ 81 | get the difference between the ground truth values (grid) and the values. 82 | if the difference is above the maxThreshold, then the difference is considered to be zero. 83 | By default the maxThreshold is going to be the half the mean distance between to ticks in the GT values (grid) 84 | This is usefull for looking at the difference between events in two tracks. 85 | TODO: Include that in signal class ? 86 | """ 87 | gridSignal = Signal(1, times=grid) 88 | valuesSignal = Signal(1, times=values) 89 | valuesSignal.quantizeTo(gridSignal, maxThreshold=maxThreshold, removeOutOfBound=False, removeDuplicatedValues=False) 90 | return valuesSignal.times - values 91 | 92 | 93 | # def findGridNeighbor(grid, values, isSorted=False): 94 | # """ 95 | # return a list of tuples indicating the position of the closest 96 | # """ 97 | # if not isSorted: 98 | # grid = sorted(grid) 99 | 100 | # # return ((value, grid[np.argmin([abs(value - tick) for tick in grid])]) for value in values) 101 | # # return ((value, min(grid, key=lambda x:abs(x-value))) for value in values) 102 | # return [(value, findNeighboor(grid, value)) for value in values] 103 | 104 | # def findNeighboor(grid, value): 105 | # """ 106 | # Assumes grid is sorted. Returns closest value to value. 107 | # If two numbers are equally close, return the smallest number. 108 | # """ 109 | # pos = bisect_left(grid, value) 110 | 111 | # if pos == 0: 112 | # return grid[0] 113 | # if pos == len(grid): 114 | # return grid[-1] 115 | # before = grid[pos - 1] 116 | # after = grid[pos] 117 | # if after - value < value - before: 118 | # return after 119 | # else: 120 | # return before 121 | 122 | 123 | def extendGrid(refTick, ticks, trackLength, approximateTickDuration, SnapDistance=0.05): 124 | """ 125 | Extends an array of ticks. fills holes and make the grid even by snapping ticks too far away from the point of reference 126 | """ 127 | joinThreshold = approximateTickDuration * SnapDistance 128 | iT = refTick[0] # index Time 129 | iL = refTick[1] # index Label 130 | result = [] 131 | while iT < trackLength: 132 | # if there is a beat next to what we expect 133 | closeBeat = [beat for beat in ticks if math.fabs(beat[0] - iT) < joinThreshold] 134 | if len(closeBeat) == 1: 135 | iT = closeBeat[0][0] 136 | result.append([iT, iL]) 137 | else: 138 | result.append([iT, iL]) 139 | 140 | iT = iT + approximateTickDuration 141 | iL = iL + 1 142 | if iL == 5: 143 | iL = 1 144 | 145 | # TODO: factorise the code 146 | iT = refTick[0] - approximateTickDuration # index Time 147 | iL = refTick[1] - 1 # index Label 148 | if iL == 0: 149 | iL = 4 150 | 151 | while iT >= joinThreshold * -1: 152 | # if there is a beat next to what we expect 153 | closeBeat = [beat for beat in ticks if math.fabs(beat[0] - iT) < joinThreshold] 154 | if len(closeBeat) == 1: 155 | iT = closeBeat[0][0] 156 | result.insert(0, [iT, iL]) 157 | elif iT >= 0: 158 | result.insert(0, [iT, iL]) 159 | 160 | iT = iT - approximateTickDuration 161 | iL = iL - 1 162 | if iL == 0: 163 | iL = 4 164 | 165 | return result 166 | 167 | 168 | def clusterValues(values: List[float], minDistance=0.5): 169 | """ 170 | Returns a list of cluster points based on the minDistance 171 | The cluster center are at the position having the most occurences 172 | 173 | clusterValues([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]) -> 1 174 | 175 | """ 176 | raise DeprecationWarning() 177 | uniqueValues = {} 178 | for value in values: 179 | uniqueValues[value] = 1 if value not in uniqueValues else uniqueValues[value] + 1 180 | 181 | results = [] 182 | for i in uniqueValues.keys(): 183 | topFlag = True 184 | for j in uniqueValues.keys(): 185 | if abs(i - j) < minDistance and uniqueValues[i] <= uniqueValues[j]: 186 | if uniqueValues[i] == uniqueValues[j]: 187 | if i < j: 188 | topFlag = False 189 | else: 190 | topFlag = False 191 | if topFlag: 192 | results.append(i) 193 | 194 | results.sort() 195 | return results 196 | 197 | 198 | def findPhase(signal: Signal, grid: Signal, period: int, toleranceWindow=0): 199 | """ 200 | find the phase of the signal based on it's amplitude at the grid positions and the number of peaks 201 | - signal: works best with a discrete signal as no aglomeration is done 202 | - grid: positions of the beats 203 | - period: the periodicity to test 204 | - tolerance window: if not at 0, returns the closest value in the signal to the grid, within the tolerance window 205 | 206 | test: 207 | # result = findPhase(Signal(np.ones(5), times=np.array([0, 4, 8, 9, 12])+1), Signal(np.ones(16), times=range(16)), period=4) 208 | # print(result) = 1 209 | """ 210 | phases = [] 211 | for phase in range(period): 212 | values = [signal.getValue(grid.times[i], toleranceWindow=0) for i in range(phase, len(grid), period)] 213 | phases.append((np.sum([v for v in values if v is not None]) * len(values))) 214 | 215 | bestPhase = np.argmax(phases) 216 | return bestPhase 217 | -------------------------------------------------------------------------------- /bin/mainCues.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from automix import config 4 | from automix.model.classes.track import Track 5 | 6 | 7 | def main(): 8 | # Load the tracks 9 | parser = argparse.ArgumentParser(description='Estimate the cue in points') 10 | parser.add_argument('folder', type=str, help="Path to the input folder containing tracks.") 11 | args = parser.parse_args() 12 | tracks = [Track(path=path) for path in config.getFolderFiles(args.folder)] 13 | 14 | # Estimate the cue points 15 | for t in tracks: 16 | cues = t.getCueIns() 17 | print(t, cues.values, cues.times) 18 | 19 | 20 | if __name__ == '__main__': 21 | main() 22 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | . 2 | #----install madmom for drums detection---- 3 | #This is a branch of madmom, it doesn't contain the last update of the main project 4 | #You have to install it in a different environment because it's not possible to install two versions of the same repository. 5 | # virtualenv madmomDrumsEnv 6 | # #install the dependencies. again, it's a hack here. 7 | # madmomDrumsEnv/bin/pip install madmom 8 | # madmomDrumsEnv/bin/pip uninstall madmom 9 | # #install from the source `: http://www.ifs.tuwien.ac.at/~vogl/models/mirex-17.zip` 10 | # wget http://www.ifs.tuwien.ac.at/~vogl/models/mirex-17.zip 11 | # unzip mirex-17.zip 12 | # tar -xvzf madmom-0.16.dev0.tar.gz 13 | # cd madmom-0.16.dev0/ 14 | # ../madmomDrumsEnv/bin/python setup.py develop --user 15 | # #check if it's working 16 | # ../madmomDrumsEnv/bin/python ../madmom-0.16.dev0/bin/DrumTranscriptor 17 | # #clean everything 18 | # rm mirex-17.zip 19 | # rm madmom-0.16.dev0.tar.gz 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import setuptools 3 | 4 | with open("README.md", "r") as fh: 5 | long_description = fh.read() 6 | setuptools.setup( 7 | name='AutoMix', 8 | version='0.1', 9 | author="Mickael Zehren", 10 | author_email="mickael.zehren@gmail.com", 11 | description="Automatic DJ-mixing of tracks", 12 | long_description=long_description, 13 | install_requires=[ 14 | "numpy", "scipy", "cython", "matplotlib", "pandas", "pyaudio", "madmom", "librosa", "essentia", "youtube-dl", "scdl", 15 | "mir_eval", "msaf", "graphviz" 16 | ], 17 | packages=setuptools.find_packages(), 18 | classifiers=[ 19 | "Programming Language :: Python :: 3", 20 | "License :: OSI Approved :: MIT License", 21 | ], 22 | ) 23 | -------------------------------------------------------------------------------- /vendors/install dep.sh: -------------------------------------------------------------------------------- 1 | #----base---- 2 | python -m pip install --upgrade pip 3 | pip install numpy 4 | pip install scipy 5 | pip install cython 6 | pip install matplotlib 7 | pip install findiff 8 | 9 | #----install madmom---- 10 | #might fail: https://github.com/SlapBot/stephanie-va/issues/8 (sudo apt-get install libportaudio2) 11 | sudo apt-get install portaudio19-dev python-pyaudio python3-pyaudio 12 | pip install pyaudio 13 | pip install madmom 14 | 15 | #----install librosa---- 16 | pip install librosa # ==0.5.1 NB < 6.0 needed for msaf 17 | apt-get install python-tk 18 | apt-get install ffmpeg 19 | 20 | #----install essentia---- 21 | pip install essentia 22 | 23 | #---- install mir_eval---- 24 | pip install mir_eval 25 | 26 | #---- install msaf---- 27 | pip install msaf 28 | 29 | #----install graph_viz---- 30 | pip install graphviz 31 | 32 | #----install IO downloader---- 33 | sudo -H pip install --upgrade youtube-dl 34 | 35 | #----install madmom for drums detection---- 36 | #This is a branch of madmom, it doesn't contain the last update of the main project 37 | #You have to install it in a different environment because it's not possible to install two versions of the same repository. 38 | # Updated to python3 (python3 -m venv madmomDrumsEnv) doesn't work for the installation. for now keeping python2 39 | virtualenv madmomDrumsEnv 40 | 41 | #install the dependencies. 42 | madmomDrumsEnv/bin/pip install numpy 43 | madmomDrumsEnv/bin/pip install scipy 44 | madmomDrumsEnv/bin/pip install cython 45 | madmomDrumsEnv/bin/pip install nose 46 | #might fail ? 47 | sudo apt-get install python-dev 48 | madmomDrumsEnv/bin/pip install pyaudio 49 | 50 | #install from the source `: http://www.ifs.tuwien.ac.at/~vogl/models/mirex-17.zip` 51 | wget http://www.ifs.tuwien.ac.at/~vogl/models/mirex-18.tar.gz 52 | tar -xvzf mirex-18.tar.gz 53 | cd madmom-0.16.dev0/ 54 | # --user seems not available anymore 55 | ../madmomDrumsEnv/bin/python setup.py develop 56 | #check if it's working 57 | cd .. 58 | madmomDrumsEnv/bin/python madmom-0.16.dev0/bin/DrumTranscriptor 59 | #clean everything 60 | rm mirex-18.tar.gz 61 | 62 | #----install soundfile decoding library 63 | pip install audioread 64 | 65 | #----Vocal Melody Extraction 66 | git clone https://github.com/s603122001/Vocal-Melody-Extraction.git 67 | cd Vocal-Melody-Extraction/ 68 | rm -rf .git 69 | echo "!!!! You have to download manually: https://drive.google.com/file/d/13kApyZ5lJEGE5CDwaeEuxVuw9sZy_xae/view !!!! (can't use wget with google drive") 70 | echo "I put the models in the folder, added a setup.py to install the project" 71 | pip install -e . 72 | echo "then I changed the loading function in melodyExt from soundfile to librosa to work with mp3." 73 | # Then add the dependencies 74 | pip install tensorflow 75 | pip install keras 76 | pip install soundfile 77 | pip install tqdm 78 | 79 | 80 | 81 | #----other---- 82 | pip install mongoengine 83 | 84 | #----Not used anymore---- 85 | #install jupyter 86 | #python -m pip install jupyter 87 | 88 | #install melodia 89 | #move the melodia's file to a vamp folder 90 | #pip install vamp 91 | 92 | #install msa 93 | #pip install msaf 94 | --------------------------------------------------------------------------------