├── files ├── inputs │ └── .gitkeep └── outputs │ └── .gitkeep ├── neural_loop_combiner ├── __init__.py ├── utils │ ├── __init__.py │ ├── features.py │ ├── visualize.py │ ├── comparison.py │ ├── manipulate.py │ ├── seperate.py │ └── utils.py ├── config │ ├── __init__.py │ ├── database.py │ └── settings.py ├── dataset │ ├── __init__.py │ ├── pipeline │ │ ├── __init__.py │ │ ├── pipeline.py │ │ ├── refintor.py │ │ └── extractor.py │ ├── sampler │ │ ├── __init__.py │ │ ├── manipuler.py │ │ └── sampler.py │ ├── tagger.py │ ├── dataSampler.py │ └── dataset.py ├── models │ ├── losses.py │ ├── models.py │ └── datasets.py └── trainer │ └── trainer.py ├── .gitignore ├── LICENCE.md ├── create_dataset.py ├── README.md ├── requirements.txt ├── data_preprocess.py └── train.py /files/inputs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /files/outputs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /neural_loop_combiner/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /neural_loop_combiner/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /neural_loop_combiner/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /neural_loop_combiner/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import * -------------------------------------------------------------------------------- /neural_loop_combiner/dataset/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline import * 2 | -------------------------------------------------------------------------------- /neural_loop_combiner/dataset/sampler/__init__.py: -------------------------------------------------------------------------------- 1 | from .sampler import * 2 | from .manipuler import * 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # exclude everything 2 | 3 | files/inputs/* 4 | files/outputs/* 5 | 6 | pretrained_models/* 7 | pretrained_models 8 | 9 | loopextractor/* 10 | loopextractor 11 | 12 | *.pyc 13 | .ipynb_checkpoints 14 | */.ipynb_checkpoints/* 15 | 16 | # exception to the rule 17 | 18 | !files/inputs/.gitkeep 19 | !files/outputs/.gitkeep 20 | -------------------------------------------------------------------------------- /neural_loop_combiner/utils/features.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings('ignore') 3 | import os 4 | import librosa 5 | 6 | def get_melspectrogram(audio, sr, n_fft=2048, hop_length=512, n_mels=128): 7 | S = librosa.feature.melspectrogram(audio, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels) 8 | S = librosa.util.normalize(S) 9 | return S -------------------------------------------------------------------------------- /neural_loop_combiner/config/database.py: -------------------------------------------------------------------------------- 1 | from neural_loop_combiner.config import settings 2 | from pymongo import MongoClient 3 | 4 | 5 | def initialize_database(col, 6 | db = settings.MONGODB_DB, 7 | server = settings.MONGODB_SERVER, 8 | port = settings.MONGODB_PORT, 9 | username = settings.MONGO_USERNAME, 10 | password = settings.MONGO_PASSWORD): 11 | return MongoClient(server, port, username=username, password=password)[db][col] -------------------------------------------------------------------------------- /neural_loop_combiner/utils/visualize.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import numpy as np 3 | 4 | def plot_layout(layout, save_path=''): 5 | x = np.array([ i for i in range(0, layout.shape[0] + 1)]) 6 | y = np.array([ i for i in range(0, layout.shape[1] + 1)]) 7 | plt.pcolormesh(x, y, layout.T) 8 | plt.ylabel('loop') 9 | plt.xlabel('bar') 10 | 11 | if save_path != '': 12 | if '.png' not in save_path: 13 | save_path = f'{save_path}.png' 14 | plt.savefig(save_path) 15 | 16 | plt.show() -------------------------------------------------------------------------------- /neural_loop_combiner/dataset/tagger.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import os 5 | import random 6 | import librosa 7 | import numpy as np 8 | from neural_loop_combiner.config import settings 9 | from neural_loop_combiner.utils.seperate import tag_loop_type 10 | from neural_loop_combiner.utils.utils import log_message, data_exclude 11 | 12 | class Tagger: 13 | 14 | def __init__(self, loop_path): 15 | self.sr = settings.SR 16 | self.cache = settings.CACHE 17 | self.out_dir = settings.OUT_DIR 18 | self.loop_path = loop_path 19 | 20 | 21 | def tag(self): 22 | return tag_loop_type(self.loop_path, self.sr, self.cache, self.out_dir) -------------------------------------------------------------------------------- /neural_loop_combiner/models/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from neural_loop_combiner.config import settings 5 | 6 | class ContrastiveLoss(nn.Module): 7 | def __init__(self, margin=settings.MARGIN): 8 | super(ContrastiveLoss, self).__init__() 9 | self.margin = margin 10 | self.eps = 1e-9 11 | 12 | def forward(self, output1, output2, target, size_average=True): 13 | distances = torch.pow(F.pairwise_distance(output1, output2), 2) 14 | losses = 0.5 * (target.float() * distances + 15 | (1 + -1 * target).float() * F.relu(self.margin - (distances + self.eps).sqrt()).pow(2)) 16 | return losses.mean() if size_average else losses.sum() -------------------------------------------------------------------------------- /neural_loop_combiner/config/settings.py: -------------------------------------------------------------------------------- 1 | # Database 2 | MONGODB_SERVER = '127.0.0.1' 3 | MONGODB_PORT = 27017 4 | MONGODB_DB = 'nlc' # neural_loop_combiner 5 | 6 | MONGO_USERNAME = '' 7 | MONGO_PASSWORD = '' 8 | 9 | MONGODB_TRACK_COL = 'tracks' 10 | MONGODB_LOOP_COL = 'loops' 11 | MONGODB_TAG_COL = 'tags' 12 | MONGODB_DATASET_COL = 'datasets' 13 | MONGODB_MODEL_COL = 'models' 14 | 15 | # Directory 16 | INT_DIR = 'files/inputs' # put tracks you want to extract here 17 | OUT_DIR = 'files/outputs' 18 | 19 | # Others 20 | DUR = 2 21 | SR = 44100 22 | CACHE = True 23 | LOG = True 24 | 25 | # Threshold 26 | HASH_TYPE = 'ahash' 27 | HASH_THRESHOLD = 5 28 | EXISTED_THRESHOLD = 0.2 29 | 30 | # Datasets 31 | TEST_SIZE = 100 32 | SPLIT_RATIO = 0.8 33 | NG_TYPES = { 34 | 'shift' : 1, 35 | 'reverse' : 1, 36 | 'rearrange': 1, 37 | 'random' : 1, 38 | 'selected' : 1 39 | } 40 | 41 | 42 | # Models (default settings) 43 | LR = 0.001 44 | MARGIN = 2 45 | EPOCHS = 2 46 | BATCH_SIZE = 128 47 | LOG_INTERVAL = 10 -------------------------------------------------------------------------------- /LICENCE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Bo-Yu Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /neural_loop_combiner/utils/comparison.py: -------------------------------------------------------------------------------- 1 | import imagehash 2 | from PIL import Image 3 | from skimage.measure import compare_ssim 4 | from imagehash import average_hash, phash, dhash, whash 5 | from mir_eval.separation import bss_eval_sources_framewise 6 | 7 | from neural_loop_combiner.config import settings 8 | 9 | 10 | def ssim_similarity(array_1, array_2): 11 | if len(array_1) > len(array_2): 12 | return compare_ssim(array_1[:len(array_2)], array_2) 13 | else: 14 | return compare_ssim(array_1, array_2[:len(array_1)]) 15 | 16 | 17 | def spec_similarity(spec1, spec2, hash_type=settings.HASH_TYPE): 18 | img1, img2 = Image.fromarray(spec1), Image.fromarray(spec2) 19 | 20 | if hash_type == 'ahash': 21 | hash1, hash2 = average_hash(img1), average_hash(img2) 22 | elif hash_type == 'phash': 23 | hash1, hash2 = phash(img1), phash(img2) 24 | elif hash_type == 'dhash': 25 | hash1, hash2 = dhash(img1), dhash(img2) 26 | elif hash_type == 'whash': 27 | hash1, hash2 = whash(img1), whash(img2) 28 | 29 | return hash1 - hash2 30 | 31 | def snr_cal(ref_audio, estm_audio): 32 | 33 | if len(ref_audio) > len(estm_audio): 34 | ref_audio = ref_audio[:len(estm_audio)] 35 | elif len(ref_audio) < len(estm_audio): 36 | estm_audio = estm_audio[:len(ref_audio)] 37 | 38 | return bss_eval_sources_framewise(ref_audio, estm_audio)[0][0][0] -------------------------------------------------------------------------------- /neural_loop_combiner/utils/manipulate.py: -------------------------------------------------------------------------------- 1 | import random 2 | import librosa 3 | import numpy as np 4 | import pyrubberband as pyrb 5 | from pysndfx import AudioEffectsChain 6 | 7 | def time_stretch(src_audio, tgt_dur, sr): 8 | src_dur = librosa.get_duration(src_audio, sr) 9 | return pyrb.time_stretch(src_audio, sr, src_dur / tgt_dur) 10 | 11 | 12 | def loops_stretch(loops, dur, sr): 13 | return {key: time_stretch(loops[key], dur, sr) for key in loops} 14 | 15 | def split_audio(audio, beats, sr): 16 | beats_sample = librosa.time_to_samples(beats, sr=sr) 17 | audio_split = [audio[beats_sample[i]:beats_sample[i+1]]for i in range(len(beats_sample)-1)] 18 | return audio_split 19 | 20 | def reverse_audio(audio): 21 | fx = AudioEffectsChain().reverse() 22 | return fx(audio) 23 | 24 | def shift_audio(audio, beats, sr): 25 | step = random.randint(1, len(beats)-2) * random.choice([-1, 1]) 26 | audio_split = split_audio(audio, beats, sr) 27 | audio_shift = np.array([]) 28 | for i in range(len(audio_split)): 29 | audio_shift = np.concatenate((audio_shift, audio_split[(i + step)%len(audio_split)])) 30 | return audio_shift 31 | 32 | 33 | def rearrange_audio(audio, beats, sr): 34 | audio_split = split_audio(audio, beats, sr) 35 | audio_rearrange = np.array([]) 36 | order = [i for i in range(0, len(audio_split))] 37 | order_rag = [i for i in range(0, len(audio_split))] 38 | while(1): 39 | random.shuffle(order_rag) 40 | if order != order_rag: 41 | break 42 | for index in order_rag: 43 | audio_rearrange = np.concatenate((audio_rearrange, audio_split[index])) 44 | return audio_rearrange 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /neural_loop_combiner/dataset/sampler/manipuler.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import filetype 5 | import os, librosa 6 | import numpy as np 7 | 8 | from neural_loop_combiner.config import settings 9 | from neural_loop_combiner.utils.utils import log_message, get_save_dir, save_audio, remove_prefix_dir 10 | from neural_loop_combiner.utils.manipulate import reverse_audio, shift_audio, rearrange_audio 11 | 12 | class Manipuler: 13 | 14 | def __init__(self, file_path, mnp_type, log_info=[]): 15 | 16 | self.sr = settings.SR 17 | self.dur = settings.DUR 18 | self.log = settings.LOG 19 | self.out_dir = settings.OUT_DIR 20 | self.file_path = os.path.join(self.out_dir, file_path) 21 | self.beats = [i / self.dur for i in range(0, 5)] 22 | self.log_info = log_info 23 | self.mnp_type = mnp_type 24 | self.audio = librosa.load(self.file_path, self.sr)[0] 25 | 26 | self.audio_manipulation() 27 | 28 | def audio_manipulation(self): 29 | 30 | sr = self.sr 31 | audio = self.audio 32 | beats = self.beats 33 | mnp_type = self.mnp_type 34 | 35 | if mnp_type == 'shift': 36 | self.mnp_audio = shift_audio(audio, beats, sr) 37 | elif mnp_type == 'reverse': 38 | self.mnp_audio = reverse_audio(audio) 39 | elif mnp_type == 'rearrange': 40 | self.mnp_audio = rearrange_audio(audio, beats, sr) 41 | 42 | 43 | def save_outputs(self): 44 | sr = self.sr 45 | out_dir = self.out_dir 46 | file_path = self.file_path 47 | mnp_type = self.mnp_type 48 | mnp_audio = self.mnp_audio 49 | saved_name = os.path.split(file_path)[-1] 50 | saved_dir = get_save_dir(out_dir, ['mnp', mnp_type]) 51 | saved_path = os.path.join(saved_dir, saved_name) 52 | save_audio(saved_path, mnp_audio, sr) 53 | 54 | 55 | return remove_prefix_dir(saved_path, out_dir) -------------------------------------------------------------------------------- /neural_loop_combiner/dataset/dataSampler.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import os 5 | import random 6 | import librosa 7 | import numpy as np 8 | from spleeter.separator import Separator 9 | from spleeter.audio.adapter import get_default_audio_adapter 10 | from neural_loop_combiner.utils.utils import log_message, data_exclude 11 | from neural_loop_combiner.config import settings 12 | from neural_loop_combiner.dataset.sampler import Sampler 13 | 14 | class DataSampler: 15 | 16 | def __init__(self, tracks_key, tracks_dict, idv_datas, harm_datas, data_type, log_info=[]): 17 | 18 | self.sr = settings.SR 19 | self.cache = settings.CACHE 20 | self.dur = settings.DUR 21 | self.log = settings.LOG 22 | self.out_dir = settings.OUT_DIR 23 | self.ng_types = [neg_type for neg_type in settings.NG_TYPES.keys() if settings.NG_TYPES[neg_type] == 1] 24 | 25 | self.data_type = data_type 26 | self.idv_datas = idv_datas 27 | self.harm_datas = harm_datas 28 | 29 | self.tracks_key = tracks_key 30 | self.tracks_dict = tracks_dict 31 | 32 | 33 | def sampling(self): 34 | tracks_key = self.tracks_key 35 | tracks_dict = self.tracks_dict 36 | ng_types = self.ng_types 37 | idv_datas = self.idv_datas 38 | harm_datas = self.harm_datas 39 | data_type = self.data_type 40 | log = self.log 41 | neg_datas = {ng_type:[] for ng_type in ng_types} 42 | total = len(tracks_key) 43 | 44 | for i, track_key in enumerate(tracks_key): 45 | excl_datas = tracks_dict[track_key]['loops_path'] 46 | pair_datas = tracks_dict[track_key]['pairs_path'] 47 | other_datas = data_exclude(idv_datas, excl_datas) 48 | for pair_data in pair_datas: 49 | neg_dict = Sampler(pair_data, other_datas, harm_datas).sampling() 50 | for neg_type in neg_dict: 51 | neg_datas[neg_type].append(neg_dict[neg_type]) 52 | if log: log_message(f'Negative Sampling processing ({data_type})', [i+1, total]) 53 | if log: log_message(f'Negative Sampling completed ({data_type})') 54 | return neg_datas 55 | 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /neural_loop_combiner/utils/seperate.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['KMP_WARNINGS'] = '0' 3 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 4 | import warnings 5 | warnings.filterwarnings("ignore") 6 | import tensorflow as tf 7 | tf.get_logger().setLevel('ERROR') 8 | 9 | import os 10 | import librosa 11 | import filetype 12 | 13 | from spleeter.separator import Separator 14 | from spleeter.audio.adapter import get_default_audio_adapter 15 | from neural_loop_combiner.utils.comparison import ssim_similarity 16 | from neural_loop_combiner.utils.utils import check_files_exist 17 | from neural_loop_combiner.utils.utils import save_audio, get_save_dir 18 | 19 | def ssp(file_path, sr, cache, out_dir): 20 | separator = Separator('spleeter:5stems', multiprocess=False) 21 | audio_loader = get_default_audio_adapter() 22 | file_path = os.path.join(out_dir, file_path) 23 | waveform, _ = audio_loader.load(file_path, sample_rate=sr) 24 | prediction = separator.separate(waveform) 25 | 26 | file_name = os.path.split(file_path)[-1].split(f'.{filetype.guess(file_path).extension}')[0] 27 | cache_dir = get_save_dir(out_dir, ['ssp', file_name]) 28 | cache_paths = [os.path.join(cache_dir, f'{path}.wav') for path in ['track', 'perc', 'bass', 'harm']] 29 | paths_exist = check_files_exist(cache_paths) 30 | 31 | if cache and paths_exist: 32 | ssp_audios = map(lambda cache_path: librosa.load(cache_path, sr)[0], cache_paths) 33 | else: 34 | audio = waveform[:, 0] 35 | perc = prediction['drums'][:, 0] 36 | bass = prediction['bass'][:, 0] 37 | harm = prediction['piano'][:, 0] + prediction['vocals'][:, 0] + prediction['other'][:, 0] 38 | ssp_audios = [audio, perc, bass, harm] 39 | 40 | if cache and not paths_exist: 41 | map(lambda index: save_audio(cache_paths[index], ssp_audios[index], sr), range(len(ssp_audios))) 42 | 43 | return ssp_audios 44 | 45 | 46 | def tag_loop_type(file_path, sr, cache, out_dir): 47 | ssp_audios = ssp(file_path, sr, cache, out_dir) 48 | audio, perc, bass, harm = ssp_audios 49 | 50 | perc_score = ssim_similarity(audio, perc) 51 | harm_score = ssim_similarity(audio, harm) 52 | bass_score = ssim_similarity(audio, bass) 53 | 54 | if perc_score < 0.5 and bass_score < 0.5: 55 | return 'harm' 56 | else: 57 | if perc_score > harm_score: 58 | if perc_score > bass_score: 59 | return 'perc' 60 | else: 61 | return 'bass' 62 | else: 63 | return 'harm' 64 | 65 | -------------------------------------------------------------------------------- /create_dataset.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import argparse, os 5 | import numpy as np 6 | 7 | from neural_loop_combiner.config import settings 8 | from neural_loop_combiner.dataset import Dataset 9 | from neural_loop_combiner.dataset.tagger import Tagger 10 | from neural_loop_combiner.utils.seperate import tag_loop_type 11 | from neural_loop_combiner.config.database import initialize_database 12 | from neural_loop_combiner.utils.utils import log_message 13 | 14 | 15 | 16 | def loops_tag(col_tags, tracks, tag): 17 | if settings.NG_TYPES['selected']: 18 | if tag == 1: 19 | log_message('Tag started') 20 | loops_path = [loop_path for track in tracks for loop_path in track['loops_path']] 21 | for i, loop_path in enumerate(loops_path): 22 | log_info = [i + 1, len(loops_path)] 23 | find_item = col_tags.find_one({'loop_path': loop_path}) 24 | if find_item: 25 | log_message(f'{loop_path} existed', log_info) 26 | else: 27 | loop_tag = Tagger(loop_path).tag() 28 | log_message(f'{loop_path} tagged', log_info) 29 | col_tags.save({'loop_path': loop_path, 'tag': loop_tag}) 30 | log_message('Tag completed') 31 | harm_datas = [loop['loop_path'] for loop in col_tags.find({'tag': 'harm'})] 32 | else: 33 | harm_datas = [] 34 | 35 | return harm_datas 36 | 37 | def dataset_creation(col_datasets, tracks, harm_datas): 38 | tracks_dict = {track['file_name']: track for track in tracks} 39 | log_message('Create dataset started') 40 | dataset = Dataset(tracks_dict, harm_datas).datas_retrieve() 41 | col_datasets.save(dataset) 42 | log_message('Create dataset completed') 43 | 44 | 45 | 46 | if __name__ == '__main__': 47 | 48 | parser = argparse.ArgumentParser(description='Dataset Creation') 49 | parser.add_argument('--tag', help='tag or not', default=1) 50 | 51 | tag = parser.parse_args().tag 52 | col_loops = initialize_database(settings.MONGODB_LOOP_COL) 53 | col_datasets = initialize_database(settings.MONGODB_DATASET_COL) 54 | col_tags = initialize_database(settings.MONGODB_TAG_COL) 55 | tracks = col_loops.find({'$where':'this.pairs_path.length >= 1'}) 56 | 57 | # Loops Tag 58 | harm_datas = loops_tag(col_tags, tracks, tag) 59 | # Dataset Creation 60 | dataset_creation(col_datasets, tracks, harm_datas) 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /neural_loop_combiner/dataset/sampler/sampler.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import os 5 | import random 6 | import librosa 7 | import numpy as np 8 | 9 | from neural_loop_combiner.config import settings 10 | from neural_loop_combiner.utils.seperate import tag_loop_type 11 | from neural_loop_combiner.utils.utils import data_include 12 | from neural_loop_combiner.dataset.sampler.manipuler import Manipuler 13 | 14 | class Sampler: 15 | 16 | def __init__(self, pair_path, others_path, harm_datas, log_info=[]): 17 | 18 | self.sr = settings.SR 19 | self.cache = settings.CACHE 20 | self.dur = settings.DUR 21 | self.log = settings.LOG 22 | self.out_dir = settings.OUT_DIR 23 | 24 | self.log_info = log_info 25 | self.pair_path = pair_path 26 | self.others_path = others_path 27 | self.harm_datas = harm_datas 28 | self.map_dict = {} 29 | self.ng_types = [neg_type for neg_type in settings.NG_TYPES.keys() if settings.NG_TYPES[neg_type] == 1] 30 | 31 | def shuffle_pair_path(self): 32 | pair_path = self.pair_path 33 | mnp_index = random.randint(0, 1) 34 | stc_index = 1 - mnp_index 35 | return pair_path[stc_index], pair_path[mnp_index] 36 | 37 | def pair_manipulation(self, mnp_type): 38 | stc_path, mnp_path = self.shuffle_pair_path() 39 | new_mnp_path = Manipuler(mnp_path, mnp_type).save_outputs() 40 | return [stc_path, new_mnp_path] 41 | 42 | def random_paring(self): 43 | others_path = self.others_path 44 | stc_path, _ = self.shuffle_pair_path() 45 | mnp_path = random.choice(others_path) 46 | return [stc_path, mnp_path] 47 | 48 | def selected_paring(self): 49 | others_path = data_include(self.others_path, self.harm_datas) 50 | stc_path, _ = self.shuffle_pair_path() 51 | mnp_path = random.choice(others_path) 52 | 53 | return [stc_path, mnp_path] 54 | 55 | def choose_sampling_method(self, method): 56 | if method in ['shift', 'reverse', 'rearrange']: 57 | return self.pair_manipulation(method) 58 | elif method == 'random': 59 | return self.random_paring() 60 | elif method == 'selected': 61 | return self.selected_paring() 62 | 63 | def sampling(self): 64 | ng_types = self.ng_types 65 | ng_dict = {} 66 | 67 | for ng_type in ng_types: 68 | ng_dict[ng_type] = self.choose_sampling_method(ng_type) 69 | 70 | return ng_dict 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /neural_loop_combiner/dataset/pipeline/pipeline.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import os, librosa 5 | import numpy as np 6 | 7 | from neural_loop_combiner.utils.utils import get_save_dir, save_audio, save_np 8 | from neural_loop_combiner.utils.utils import pair_table_creation, remove_prefix_dir 9 | from neural_loop_combiner.config import settings 10 | from neural_loop_combiner.utils.manipulate import time_stretch 11 | 12 | from neural_loop_combiner.dataset.pipeline.extractor import Extractor 13 | from neural_loop_combiner.dataset.pipeline.refintor import Refintor 14 | 15 | 16 | class Pipeline: 17 | 18 | def __init__(self, file_name, media_type, gpu_num, log_info=[]): 19 | 20 | self.sr = settings.SR 21 | self.log = settings.LOG 22 | self.cache = settings.CACHE 23 | self.int_dir = settings.INT_DIR 24 | self.out_dir = settings.OUT_DIR 25 | 26 | self.log_info = log_info 27 | self.file_name = file_name 28 | self.file_path = os.path.join(self.int_dir, f'{file_name}.{media_type}') 29 | 30 | self.extractor = Extractor(file_name, media_type, gpu_num, self.log_info) 31 | self.refinter = Refintor(self.extractor, self.log_info) 32 | 33 | 34 | def start(self): 35 | 36 | file_name = self.file_name 37 | loops = self.refinter.loops 38 | layout = self.refinter.layout 39 | template = self.refinter.template 40 | 41 | loops_path = self.save_outputs(file_name, 'refined', loops, layout, template) 42 | pairs_path = self.pair_paths_creation(template, loops_path) 43 | 44 | return { 45 | 'file_name' : file_name, 46 | 'loops_path': loops_path, 47 | 'pairs_path': pairs_path 48 | } 49 | 50 | 51 | 52 | def pair_paths_creation(self, template, loops_path): 53 | 54 | pair_table = pair_table_creation(template) 55 | pair_path = [[loops_path[index] for index in pair] for pair in pair_table] 56 | 57 | return pair_path 58 | 59 | 60 | 61 | def save_outputs(self, file_name, sub_dir, loops, layout, template): 62 | sr = self.sr 63 | cache = self.cache 64 | out_dir = self.out_dir 65 | 66 | 67 | loops_dir = get_save_dir(out_dir, [sub_dir, 'loops']) 68 | loops_path = [os.path.join(loops_dir, f'{file_name}_{index}.wav') for index in loops] 69 | 70 | for ith_loop in loops: 71 | save_audio(loops_path[ith_loop], loops[ith_loop], sr) 72 | 73 | if cache: 74 | layout_path = os.path.join(get_save_dir(out_dir, [sub_dir, 'layout']) , f'{file_name}.npy') 75 | template_path = os.path.join(get_save_dir(out_dir, [sub_dir, 'template']), f'{file_name}.npy') 76 | 77 | save_np(layout_path , layout) 78 | save_np(template_path, template) 79 | 80 | 81 | return [remove_prefix_dir(path , out_dir) for path in loops_path] 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /neural_loop_combiner/models/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.init as init 4 | 5 | def init_weights(m): 6 | if isinstance(m, nn.Conv2d): 7 | init.xavier_normal_(m.weight.data) 8 | if m.bias is not None: 9 | init.normal_(m.bias.data) 10 | elif isinstance(m, nn.ConvTranspose2d): 11 | init.xavier_normal_(m.weight.data) 12 | if m.bias is not None: 13 | init.normal_(m.bias.data) 14 | elif isinstance(m, nn.BatchNorm1d): 15 | init.normal_(m.weight.data, mean=1, std=0.02) 16 | init.constant_(m.bias.data, 0) 17 | elif isinstance(m, nn.BatchNorm2d): 18 | init.normal_(m.weight.data, mean=1, std=0.02) 19 | init.constant_(m.bias.data, 0) 20 | elif isinstance(m, nn.Linear): 21 | init.xavier_normal_(m.weight.data) 22 | init.normal_(m.bias.data) 23 | 24 | class Skeleton(nn.Module): 25 | def __init__(self): 26 | super(Skeleton, self).__init__() 27 | self.conv = nn.Sequential(nn.Conv2d(1, 16, 3, padding=1), 28 | nn.Dropout(0.1), 29 | nn.BatchNorm2d(16), 30 | nn.PReLU(), 31 | nn.Conv2d(16, 4, 3, padding=1), 32 | nn.Dropout(0.1), 33 | nn.BatchNorm2d(4), 34 | nn.PReLU()) 35 | 36 | self.fc = nn.Sequential(nn.Linear(4 * 128 * 173, 256), 37 | nn.BatchNorm1d(256), 38 | nn.PReLU(), 39 | nn.Linear(256, 128), 40 | nn.BatchNorm1d(128), 41 | nn.PReLU(), 42 | nn.Linear(128, 16)) 43 | 44 | self.apply(init_weights) 45 | 46 | def forward(self, x): 47 | output = self.conv(x) 48 | output = output.view(output.size()[0], -1) 49 | output = self.fc(output) 50 | return output 51 | 52 | 53 | class CNN(nn.Module): 54 | def __init__(self, skeleton): 55 | super(CNN, self).__init__() 56 | self.conv = skeleton.conv 57 | self.fc = nn.Sequential(skeleton.fc, 58 | nn.BatchNorm1d(16), 59 | nn.PReLU(), 60 | nn.Linear(16, 1), 61 | nn.Sigmoid()) 62 | self.apply(init_weights) 63 | 64 | 65 | def forward(self, x): 66 | output = self.conv(x) 67 | output = output.view(output.size()[0], -1) 68 | output = self.fc(output) 69 | return output 70 | 71 | 72 | class SNN(nn.Module): 73 | def __init__(self, skeleton): 74 | super(SNN, self).__init__() 75 | self.conv = skeleton.conv 76 | self.fc = skeleton.fc 77 | self.apply(init_weights) 78 | 79 | 80 | def forward_once(self, x): 81 | output = self.conv(x) 82 | output = output.view(output.size()[0], -1) 83 | output = self.fc(output) 84 | return output 85 | 86 | def forward(self, x1, x2): 87 | output1 = self.forward_once(x1) 88 | output2 = self.forward_once(x2) 89 | return output1, output2 90 | 91 | 92 | -------------------------------------------------------------------------------- /neural_loop_combiner/models/datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import librosa 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | 6 | from torch.utils.data import Dataset 7 | 8 | from neural_loop_combiner.config import settings 9 | from neural_loop_combiner.utils.features import get_melspectrogram 10 | from neural_loop_combiner.utils.utils import data_shuffle 11 | 12 | class MixedDataset(Dataset): 13 | def __init__(self, pos_datas, neg_datas): 14 | self.sr = settings.SR 15 | self.out_dir = settings.OUT_DIR 16 | self.datas_path = data_shuffle(self.attach_label(pos_datas, 1) + self.attach_label(neg_datas, 0)) 17 | 18 | def attach_label(self, datas, label): 19 | return [[data, label] for data in datas] 20 | 21 | def __getitem__(self, index): 22 | sr = self.sr 23 | out_dir = self.out_dir 24 | 25 | datas_path, label = self.datas_path[index] 26 | datas_path = [os.path.join(out_dir, data_path) for data_path in datas_path ] 27 | datas_audio = [librosa.load(data_path, sr=sr)[0] for data_path in datas_path] 28 | mixed_audio = datas_audio[0] * 0.5 + datas_audio[1] * 0.5 29 | mixed_spec = get_melspectrogram(mixed_audio, sr) 30 | mixed_spec = mixed_spec.reshape(1, mixed_spec.shape[0], mixed_spec.shape[1]) 31 | 32 | 33 | return (mixed_spec, mixed_audio, datas_audio, datas_path, label) 34 | 35 | 36 | def __len__(self): 37 | return len(self.datas_path) 38 | 39 | class PairDataset(Dataset): 40 | def __init__(self, pos_datas, neg_datas): 41 | 42 | self.sr = settings.SR 43 | self.out_dir = settings.OUT_DIR 44 | self.datas_path = data_shuffle(self.attach_label(pos_datas, 1) + self.attach_label(neg_datas, 0)) 45 | 46 | def attach_label(self, datas, label): 47 | return [[data, label] for data in datas] 48 | 49 | def __getitem__(self, index): 50 | sr = self.sr 51 | out_dir = self.out_dir 52 | 53 | datas_path, label = self.datas_path[index] 54 | datas_path = [os.path.join(out_dir, data_path) for data_path in datas_path] 55 | datas_audio = [librosa.load(data_path, sr=sr)[0] for data_path in datas_path] 56 | datas_spec = [get_melspectrogram(data_audio, sr) for data_audio in datas_audio] 57 | datas_spec = [data_spec.reshape(1, data_spec.shape[0], data_spec.shape[1])for data_spec in datas_spec] 58 | 59 | return (datas_spec, datas_audio, datas_path, label) 60 | 61 | def __len__(self): 62 | return len(self.datas_path) 63 | 64 | 65 | 66 | class SingleDataset(Dataset): 67 | def __init__(self, datas_path): 68 | 69 | self.sr = settings.SR 70 | self.out_dir = settings.OUT_DIR 71 | self.data_paths = data_shuffle(datas_path) 72 | 73 | def __getitem__(self, index): 74 | 75 | sr = self.sr 76 | out_dir = self.out_dir 77 | 78 | data_path = os.path.join(out_dir, self.data_paths[index]) 79 | data_audio = librosa.load(data_path, sr=self.sr)[0] 80 | data_spec = get_melspectrogram(data_audio, self.sr) 81 | data_spec = data_spec.reshape(1, data_spec.shape[0], data_spec.shape[1]) 82 | 83 | return (data_spec, data_audio, data_path) 84 | 85 | 86 | def __len__(self): 87 | return len(self.data_paths) -------------------------------------------------------------------------------- /neural_loop_combiner/utils/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import filetype 4 | import os, librosa 5 | import numpy as np 6 | import soundfile as sf 7 | from neural_loop_combiner.config import settings 8 | 9 | def save_np(file_path, file): 10 | existed = os.path.exists(file_path) 11 | if not existed: 12 | np.save(file_path, file) 13 | return existed 14 | 15 | def load_np(file_path, cache): 16 | return np.load(file_path, allow_pickle=True) if cache and os.path.exists(file_path) else None 17 | 18 | def save_audio(audio_path, audio, sr): 19 | existed = os.path.exists(audio_path) 20 | sf.write(audio_path, audio, sr) 21 | return existed 22 | 23 | def check_exist(output_path): 24 | existed = os.path.exists(output_path) 25 | if not existed: 26 | os.makedirs(output_path) 27 | return existed 28 | 29 | def check_files_exist(files): 30 | exists = [os.path.exists(file) for file in files] 31 | return sum(exists) == len(files) 32 | 33 | def str_replace(string): 34 | return string.replace('/', '_').replace(' ', '_').replace('__', '_').replace('__', '_') 35 | 36 | def get_save_dir(out_dir, cat_dirs): 37 | for cat_dir in cat_dirs: 38 | out_dir = os.path.join(out_dir, cat_dir) 39 | check_exist(out_dir) 40 | return out_dir 41 | 42 | def get_file_name(file_path): 43 | media_type = filetype.guess(file_path).extension 44 | file_name = os.path.split(file_path)[-1].split(f'.{media_type}')[0] 45 | return file_name, media_type 46 | 47 | def remove_prefix_dir(path, first_dir): 48 | return os.path.relpath(path, first_dir) 49 | 50 | 51 | def log_message(message, log_info=[]): 52 | if len(log_info) <= 0: 53 | print(f'{message} ...') 54 | else: 55 | junction = '/' 56 | print(f'[{junction.join([str(log) for log in log_info])}] {message}...') 57 | 58 | def discrete_matrix(matrix, thre): 59 | return matrix > thre 60 | 61 | 62 | def pair_iteration(lst1, lst2): 63 | return [list(lst) for lst in np.unique([np.array(sorted([x, y])) for x in lst1 for y in lst2 if x != y], axis=0)] 64 | 65 | 66 | def pair_table_creation(template): 67 | pair_template = [v for v in template if sum(v) >= 2] 68 | if len(pair_template) != 0: 69 | unique_bar = np.unique(pair_template, axis=0) 70 | pair_bar = [sorted(y[0]) for y in (map(lambda x: np.where(x == True), unique_bar))] 71 | pair_table = np.unique([np.array(y) for x in list(map(lambda bar: pair_iteration(bar, bar), pair_bar)) for y in x], axis=0) 72 | pair_table = [list(x) for x in pair_table] 73 | else: 74 | pair_table = [] 75 | return pair_table 76 | 77 | def data_shuffle(array): 78 | random.shuffle(array) 79 | return array 80 | 81 | def data_exclude(array, excl_array): 82 | return [element for element in array if element not in excl_array] 83 | 84 | def data_include(array, incd_array): 85 | return [element for element in array if element in incd_array] 86 | 87 | def data_deduplicate(array): 88 | deduplicate_array = [] 89 | for element in array: 90 | if element not in deduplicate_array: 91 | deduplicate_array.append(element) 92 | return deduplicate_array 93 | 94 | def save_json(data, file_path): 95 | with open(file_path, 'w') as outfile: 96 | json.dump(data, outfile) 97 | 98 | def load_json(file_path): 99 | with open(file_path, 'r') as file: 100 | data = json.load(file) 101 | return data -------------------------------------------------------------------------------- /neural_loop_combiner/dataset/pipeline/refintor.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import os, librosa 5 | import numpy as np 6 | from functools import reduce 7 | from sklearn.preprocessing import normalize 8 | from neural_loop_combiner.config import settings 9 | from neural_loop_combiner.utils.utils import log_message, discrete_matrix, get_save_dir 10 | from neural_loop_combiner.utils.comparison import spec_similarity 11 | from neural_loop_combiner.utils.manipulate import time_stretch 12 | 13 | 14 | class Refintor: 15 | 16 | def __init__(self, extractor, log_info=[]): 17 | 18 | self.sr = settings.SR 19 | self.dur = settings.DUR 20 | self.log = settings.LOG 21 | 22 | self.exist_thre = settings.EXISTED_THRESHOLD 23 | self.dupl_thre = settings.HASH_THRESHOLD 24 | 25 | self.log_info = log_info 26 | self.extractor = extractor 27 | 28 | if self.log: log_message('Refintor started', self.log_info) 29 | self.refinement() 30 | self.template = discrete_matrix(self.layout, self.exist_thre) 31 | if self.log: log_message('Refintor completed', self.log_info) 32 | 33 | def loops_refinement(self, dupl_table): 34 | 35 | sr = self.sr 36 | dur = self.dur 37 | 38 | ext_layout = self.extractor.layout 39 | ext_loops = self.extractor.loops 40 | 41 | loops = {} 42 | layout = np.zeros((ext_layout.shape[0], len(dupl_table.keys()))) 43 | 44 | for i, max_snr_ith in enumerate(dupl_table): 45 | sim_iths = [max_snr_ith]+ dupl_table[max_snr_ith] 46 | actv_val = list(map(lambda ith: ext_layout[:, ith], sim_iths)) 47 | layout[:, i] = reduce(lambda x, y: x + y, actv_val) 48 | loops[i] = time_stretch(ext_loops[max_snr_ith], dur, sr) 49 | 50 | 51 | return loops, normalize(layout, norm='l2') 52 | 53 | def refinement(self): 54 | 55 | dupl_thre = self.dupl_thre 56 | ext_layout = self.extractor.layout 57 | ext_loops = self.extractor.loops 58 | ext_specs = self.extractor.specs 59 | ext_snr_scores = self.extractor.snr_scores 60 | 61 | 62 | dupl_table, used_iths = {}, [] 63 | 64 | for ith in ext_loops: 65 | 66 | ith_loop = ext_loops[ith] 67 | ith_spec = ext_specs[ith] 68 | 69 | if ith not in used_iths: 70 | similarity = list(map(lambda tgt_ith: spec_similarity(ith_spec, ext_specs[tgt_ith]), ext_specs)) 71 | 72 | dupl_ith = [i for i, s in enumerate(similarity) if s < dupl_thre and i not in used_iths] 73 | used_iths += dupl_ith 74 | 75 | max_snr_score = max(map(lambda i: ext_snr_scores[i], dupl_ith)) 76 | max_snr_ith = [i for i in dupl_ith if ext_snr_scores[i] == max_snr_score][0] 77 | repl_iths = list(filter(lambda i: i != max_snr_ith, dupl_ith)) 78 | 79 | dupl_table[max_snr_ith] = repl_iths 80 | 81 | if len(used_iths) == len(ext_loops.keys()): 82 | break 83 | 84 | self.loops, self.layout = self.loops_refinement(dupl_table) 85 | 86 | 87 | 88 | 89 | def get_features(self): 90 | return { 91 | 'loops' : self.loops, 92 | 'layout' : self.layout, 93 | 'template': self.template, 94 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neural Loop Combiner: Neural Network Models For Assessing The Compatibility of Loops 2 | > This repository contains the code for "[Neural Loop Combiner: Neural Network Models For Assessing The Compatibility of Loops](https://arxiv.org/abs/2008.02011)" 3 | > *Proceedings of the 21st International Society for Music Information Retrieval Conference (ISMIR), 2020.* 4 | > Bo-Yu Chen, Jordan B. L. Smith, Yi-Hsuan Yang 5 | 6 | If you want to hear more audio example, please check the demo page [here](https://paulyuchen.com/Neural-Loop-Combiner-Demo/) 7 | If you want to play it in interactive way, please check [Beats with You](http://paulyuchen.com/beats-with-you/) [[code]](https://github.com/ChenPaulYu/beats-with-you) 8 | 9 | ## Prerequisites 10 | - python=3.7.8 11 | - torch=1.7.0 12 | - librosa=0.7.2 13 | - download loopextractor script from [here](https://github.com/jblsmith/loopextractor) 14 | 15 | ### Installing Required Libraries 16 | ``` 17 | git clone https://github.com/jblsmith/loopextractor.git 18 | pip install -r requirements.txt 19 | 20 | ``` 21 | 22 | ### Run MongoDB 23 | - Install mongo locally follow the [article](https://docs.mongodb.com/master/administration/install-community/) 24 | - Run mongo to connect to your database, just to make sure it's working. Once you see a mongo prompt, exit with Control-D 25 | - Set `Database` configuration in `./neural_loop_combiner/config/settings.py` 26 | 27 | ## Data Preprocessing 28 | - Set `Directory` and `Directory` and `Others` configuration in `./neural_loop_combiner/config/settings.py` 29 | - Use `data_preprocess.py` file to preprocess input datas (`INT_DIR` in `./neural_loop_combiner/config/settings.py`) 30 | 31 | Data preprocessing consists of two main stages: 32 | 1. Load Tracks - Load tracks from inputs directory to database which is used to decompose 33 | 2. Data Generation - Decompose tracks to individual loops and layout (arrangement) 34 | 35 | ``` 36 | python data_preprocess.py [--load=(0, 1)] [--extract=(0, 1)] [--gpu_num=0] 37 | 38 | ``` 39 | - `--load`: whehter execute the load_tracks step (1 -> execute, 0 -> skip) 40 | - `--extract`: whehter execute the data_generation step (1 -> execute, 0 -> skip) 41 | - `--gpu_num`: specify which gpu should used to execute the code 42 | 43 | ***Note that the second stage takes a fairly long time - more than an day.*** 44 | 45 | ## Create Dataset 46 | - Set `Datasets` configuration in `./neural_loop_combiner/config/settings.py` 47 | - Set NG_TYPES to decide which negative sampling should include in the datasets 48 | - Use `create_dataseet.py` file to create the train/val/test datas 49 | 50 | Create Dataset consists of two main stages: 51 | 1. Loops Tag - Tag loops type (harmonic, percussion, bass), only use in `selected` negative sampling 52 | 2. Dataset Creation - Run negative sampling based on `./neural_loop_combiner/config/settings.py` and create positive/negative data 53 | 54 | 55 | ``` 56 | python create_dataset.py [--tag=(0, 1)] 57 | ``` 58 | - `--tag`: whether execute the loops tag step or directly import from database (1 -> execute, 0 -> import) 59 | 60 | ***Note that the first stage takes a fairly long time - more than an day.*** 61 | 62 | 63 | ## Model Training 64 | - Set `Models` configuration in `./neural_loop_combiner/config/settings.py` 65 | - Use `train.py` file to train the models 66 | 67 | ``` 68 | python train.py [--gpu_num=0] [--lr=0.01] [--epochs=20] [--batch_size=128] [--log_interval=10] [--neg_type=(random, selected, shift, rearrange, reverse)] [--model_type=(cnn/snn)] 69 | ``` 70 | - `--gpu_num`: specify which gpu should used to execute the code 71 | - `--lr`: learning rate used to train the model 72 | - `--epochs`: how many epochs should used to train the model 73 | - `--batch_size`: batch size used to train the model 74 | - `--log_interval`: how often should log the message 75 | - `--neg_type`: specify the model should train in which negative sampling method (random, selected, shift, rearrange, reverse) 76 | - `--model_type`: specify which kind of model you want to train (snn/cnn) 77 | 78 | 79 | ## Contact 80 | Please feel free to contact [Bo-Yu Chen](http://paulyuchen.com/) if you have any questions. 81 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | _libgcc_mutex=0.1=conda_forge 5 | _openmp_mutex=4.5=1_gnu 6 | _tflow_select=2.3.0=mkl 7 | absl-py=0.10.0=py37hc8dfbb8_1 8 | astor=0.8.1=pyh9f0ad1d_0 9 | audioread=2.1.8=py37hc8dfbb8_3 10 | brotlipy=0.7.0=py37hb5d75c8_1001 11 | bzip2=1.0.8=h516909a_3 12 | c-ares=1.16.1=h516909a_3 13 | ca-certificates=2020.6.20=hecda079_0 14 | cached-property=1.5.1=py_0 15 | certifi=2020.6.20=py37he5f6b98_2 16 | cffi=1.14.3=py37h00ebd2e_1 17 | chardet=3.0.4=py37he5f6b98_1008 18 | cryptography=3.2.1=py37hc72a4ac_0 19 | cycler=0.10.0=py_2 20 | dataclasses=0.6=pypi_0 21 | decorator=4.4.2=py_0 22 | ffmpeg=4.3.1=h3215721_1 23 | ffmpeg-python=0.2.0=py_0 24 | filetype=1.0.7=pypi_0 25 | freetype=2.10.4=h7ca028e_0 26 | future=0.18.2=py37hc8dfbb8_2 27 | gast=0.2.2=py_0 28 | gettext=0.19.8.1=hf34092f_1004 29 | gmp=6.2.0=h58526e2_4 30 | gnutls=3.6.13=h79a8f9a_0 31 | google-pasta=0.2.0=pyh8c360ce_0 32 | grpcio=1.33.2=py37haffed2e_2 33 | h5py=3.1.0=nompi_py37h1e651dc_100 34 | hdf5=1.10.6=nompi_h1022a3e_1110 35 | idna=2.10=pyh9f0ad1d_0 36 | imagehash=4.1.0=pypi_0 37 | imageio=2.9.0=pypi_0 38 | importlib-metadata=2.0.0=py_1 39 | joblib=0.17.0=py_0 40 | jpeg=9d=h36c2ea0_0 41 | keras-applications=1.0.8=py_1 42 | keras-preprocessing=1.1.0=py_0 43 | kiwisolver=1.3.1=py37hc928c03_0 44 | krb5=1.17.1=hfafb76e_3 45 | lame=3.100=h14c3975_1001 46 | lcms2=2.11=hcbb858e_1 47 | ld_impl_linux-64=2.35=h769bd43_9 48 | libblas=3.9.0=2_openblas 49 | libcblas=3.9.0=2_openblas 50 | libcurl=7.71.1=hcdd3856_8 51 | libedit=3.1.20191231=he28a2e2_2 52 | libev=4.33=h516909a_1 53 | libffi=3.2.1=he1b5a44_1007 54 | libflac=1.3.3=he1b5a44_0 55 | libgcc-ng=9.3.0=h5dbcf3e_17 56 | libgfortran-ng=9.3.0=he4bcb1c_17 57 | libgfortran5=9.3.0=he4bcb1c_17 58 | libgomp=9.3.0=h5dbcf3e_17 59 | libiconv=1.16=h516909a_0 60 | liblapack=3.9.0=2_openblas 61 | libllvm8=8.0.1=hc9558a2_0 62 | libnghttp2=1.41.0=h8cfc5f6_2 63 | libogg=1.3.2=h516909a_1002 64 | libopenblas=0.3.12=pthreads_h4812303_1 65 | libpng=1.6.37=h21135ba_2 66 | libprotobuf=3.13.0.1=h8b12597_0 67 | librosa=0.7.2=py_1 68 | libsndfile=1.0.29=he1b5a44_0 69 | libssh2=1.9.0=hab1572f_5 70 | libstdcxx-ng=9.3.0=h2ae2ef3_17 71 | libtiff=4.1.0=h4f3a223_6 72 | libvorbis=1.3.7=he1b5a44_0 73 | libwebp-base=1.1.0=h36c2ea0_3 74 | llvmlite=0.31.0=py37h5202443_1 75 | lz4-c=1.9.2=he1b5a44_3 76 | markdown=3.3.3=pyh9f0ad1d_0 77 | matplotlib-base=3.3.2=py37hc9afd2a_1 78 | mir-eval=0.6=pypi_0 79 | ncurses=6.2=h58526e2_3 80 | nettle=3.4.1=h1bed415_1002 81 | networkx=2.5=pypi_0 82 | norbert=0.2.1=py_0 83 | numba=0.48.0=py37hb3f55d8_0 84 | numpy=1.19.4=py37h7e9df27_0 85 | olefile=0.46=pyh9f0ad1d_1 86 | openh264=2.1.1=h8b12597_0 87 | openssl=1.1.1h=h516909a_0 88 | opt-einsum=3.3.0=pypi_0 89 | packaging=20.4=pypi_0 90 | pandas=0.25.1=py37hb3f55d8_0 91 | pillow=8.0.1=py37h63a5d19_0 92 | pip=20.2.4=py_0 93 | pooch=1.2.0=pypi_0 94 | protobuf=3.13.0.1=py37h3340039_1 95 | pycparser=2.20=pyh9f0ad1d_2 96 | pymongo=3.11.0=pypi_0 97 | pyopenssl=19.1.0=py_1 98 | pyparsing=2.4.7=pyh9f0ad1d_0 99 | pyrubberband=0.3.0=pypi_0 100 | pysndfx=0.3.6=pypi_0 101 | pysocks=1.7.1=py37he5f6b98_2 102 | pysoundfile=0.9.0.post1=pypi_0 103 | python=3.7.8=h6f2ec95_1_cpython 104 | python-dateutil=2.8.1=py_0 105 | python_abi=3.7=1_cp37m 106 | pytz=2020.4=pyhd8ed1ab_0 107 | pywavelets=1.1.1=pypi_0 108 | readline=8.0=he28a2e2_2 109 | requests=2.24.0=pyh9f0ad1d_0 110 | resampy=0.2.2=py_0 111 | scikit-image=0.17.2=pypi_0 112 | scikit-learn=0.23.2=py37hddcf8d6_2 113 | scipy=1.5.3=py37h14a347d_0 114 | setuptools=49.6.0=py37he5f6b98_2 115 | six=1.15.0=pyh9f0ad1d_0 116 | spleeter=1.5.3=py37hc8dfbb8_2 117 | sqlite=3.33.0=h4cf870e_1 118 | tensorboard=1.15.0=py37_0 119 | tensorflow=1.15.2=pypi_0 120 | tensorflow-estimator=1.15.1=pyh2649769_0 121 | termcolor=1.1.0=py_2 122 | threadpoolctl=2.1.0=pyh5ca1d4c_0 123 | tifffile=2020.10.1=pypi_0 124 | tk=8.6.10=hed695b0_1 125 | torch=1.7.0=pypi_0 126 | tornado=6.1=py37h4abf009_0 127 | typing-extensions=3.7.4.3=pypi_0 128 | urllib3=1.25.11=py_0 129 | werkzeug=0.16.1=py_0 130 | wheel=0.35.1=pyh9f0ad1d_0 131 | wrapt=1.12.1=py37h8f50634_1 132 | x264=1!152.20180806=h14c3975_0 133 | xz=5.2.5=h516909a_1 134 | zipp=3.4.0=py_0 135 | zlib=1.2.11=h516909a_1010 136 | zstd=1.4.5=h6597ccf_2 137 | -------------------------------------------------------------------------------- /data_preprocess.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import argparse, os 5 | import numpy as np 6 | 7 | from neural_loop_combiner.config import settings 8 | from neural_loop_combiner.utils.utils import log_message, get_file_name 9 | from neural_loop_combiner.config.database import initialize_database 10 | from neural_loop_combiner.dataset.pipeline import Pipeline 11 | 12 | 13 | 14 | 15 | def load_tracks(col_tracks): 16 | 17 | int_dir = settings.INT_DIR 18 | int_files = [int_file for int_file in os.listdir(int_dir) if int_file != '.ipynb_checkpoints'] 19 | 20 | save_count, exist_count, total = 0, 0, len(int_files) 21 | log_message('Load tracks start') 22 | 23 | for int_file in int_files: 24 | int_path = os.path.join(int_dir, int_file) 25 | file_name, media_type = get_file_name(int_path) 26 | find_item = col_tracks.find_one({'file_name': file_name}) 27 | 28 | if find_item: 29 | exist_count += 1 30 | log_message('Exist', [save_count, exist_count, total]) 31 | else: 32 | save_count += 1 33 | col_tracks.save({ 34 | 'extracted' : False, 35 | 'file_name' : file_name, 36 | 'media_type': media_type, 37 | }) 38 | log_message('Save', [save_count, exist_count, total]) 39 | 40 | log_message('Load tracks completed') 41 | 42 | 43 | def data_generation(col_tracks, col_loops): 44 | 45 | failed_count = 0 46 | break_loop = False 47 | count = failed_count 48 | 49 | failed_count = 0 50 | break_loop = False 51 | count = failed_count 52 | 53 | 54 | while 1: 55 | try: 56 | tracks = col_tracks.find({'extracted': False}) 57 | total = tracks.count() 58 | count = failed_count 59 | 60 | if total == 0 or count + 1 == total: 61 | print('Finished.....') 62 | break 63 | 64 | for track in tracks[count:]: 65 | 66 | track_info = track.copy() 67 | file_name = track['file_name'] 68 | media_type = track['media_type'] 69 | log_info = [failed_count, count, total] 70 | track_loop = Pipeline(file_name, media_type, gpu_num, log_info) 71 | track_loop_info = track_loop.start() 72 | 73 | track_info['extracted'] = True 74 | col_tracks.save(track_info) 75 | 76 | find_item = col_loops.find_one({'file_name': file_name}) 77 | if find_item: 78 | track_loop_info['_id'] = find_item['_id'] 79 | col_loops.save(track_loop_info) 80 | 81 | count += 1 82 | log_message('Save Success', log_info) 83 | 84 | except Exception as e: 85 | if type(e).__name__ == 'CursorNotFound': 86 | log_message('Restart') 87 | else: 88 | log_message(f'Save Failed: {e}') 89 | failed_count += 1 90 | 91 | 92 | 93 | if __name__ == '__main__': 94 | 95 | parser = argparse.ArgumentParser(description='Data Generation Pipline') 96 | parser.add_argument('--load' , help='load tracks' , default=1) 97 | parser.add_argument('--extract', help='extract loops', default=1) 98 | parser.add_argument('--gpu_num', help='gpu num' , default=0) 99 | 100 | gpu_num = parser.parse_args().gpu_num 101 | load = parser.parse_args().load 102 | extract = parser.parse_args().extract 103 | 104 | # Loading Tracks to MongoDB 105 | 106 | col_tracks = initialize_database(settings.MONGODB_TRACK_COL) 107 | if load == 1: load_tracks(col_tracks) 108 | 109 | # Execute Loops Extraction 110 | 111 | col_loops = initialize_database(settings.MONGODB_LOOP_COL) 112 | if extract == 1: data_generation(col_tracks, col_loops) 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import os 5 | import pymongo 6 | import torch 7 | import librosa 8 | import argparse 9 | import datetime 10 | import numpy as np 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.optim as optim 15 | import torch.utils.data as Data 16 | from torch.optim import lr_scheduler 17 | 18 | from neural_loop_combiner.config import settings 19 | from neural_loop_combiner.utils.utils import log_message, load_json 20 | from neural_loop_combiner.config.database import initialize_database 21 | from neural_loop_combiner.models.datasets import MixedDataset, PairDataset, SingleDataset 22 | from neural_loop_combiner.trainer.trainer import Trainer 23 | from neural_loop_combiner.models.models import Skeleton, CNN, SNN 24 | from neural_loop_combiner.models.losses import ContrastiveLoss 25 | 26 | 27 | def load_dataset(out_dir, data_path): 28 | return load_json(os.path.join(out_dir, data_path)) 29 | 30 | 31 | def load_dataloader(model_type, data_type, neg_type, datasets): 32 | pos_datas = datasets['pos'][data_type] 33 | neg_datas = datasets['neg'][data_type][neg_type] 34 | torch_datasets = MixedDataset(pos_datas, neg_datas) if model_type == 'cnn' else PairDataset(pos_datas, neg_datas) 35 | return Data.DataLoader(dataset=torch_datasets, batch_size=batch_size, shuffle= True if data_type else False == 'train', num_workers=1) 36 | 37 | def load_parameters(model_type, lr, batch_size, gpu_num): 38 | model = CNN(Skeleton()) if model_type == 'cnn' else SNN(Skeleton()) 39 | loss_fn = nn.BCELoss() if model_type == 'cnn' else ContrastiveLoss() 40 | device = torch.device("cuda:{}".format(gpu_num)) 41 | optimizer = optim.Adam(model.parameters(), lr=lr) 42 | scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1) 43 | 44 | return model, loss_fn, optimizer, scheduler, device 45 | 46 | 47 | 48 | 49 | if __name__ == '__main__': 50 | 51 | parser = argparse.ArgumentParser(description='Train') 52 | parser.add_argument('--gpu_num' , help='gpu num' , default=2) 53 | parser.add_argument('--lr' , help='lr' , default=settings.LR) 54 | parser.add_argument('--epochs' , help='epochs' , default=settings.EPOCHS) 55 | parser.add_argument('--batch_size' , help='batch size' , default=settings.BATCH_SIZE) 56 | parser.add_argument('--log_interval', help='log interval', default=settings.LOG_INTERVAL) 57 | 58 | parser.add_argument('--neg_type' , help='neg type' , default='random') 59 | parser.add_argument('--model_type' , help='model type' , default='cnn') 60 | 61 | 62 | col_datasets = initialize_database(settings.MONGODB_DATASET_COL) 63 | col_models = initialize_database(settings.MONGODB_MODEL_COL) 64 | datas = col_datasets.find({}).sort('date',pymongo.DESCENDING)[0] 65 | datasets = load_dataset(settings.OUT_DIR, datas['data_path']) 66 | 67 | gpu_num = parser.parse_args().gpu_num 68 | log_interval = parser.parse_args().log_interval 69 | neg_type = parser.parse_args().neg_type 70 | 71 | if neg_type not in datas['neg_types']: 72 | log_message(f'{neg_type} not exists') 73 | else: 74 | model_type = 'snn' if parser.parse_args().model_type != 'cnn' else parser.parse_args().model_type 75 | lr = parser.parse_args().lr 76 | epochs = parser.parse_args().epochs 77 | batch_size = parser.parse_args().batch_size 78 | model, loss_fn, optimizer, scheduler, device = load_parameters(model_type, lr, batch_size, gpu_num) 79 | 80 | train_loader = load_dataloader(model_type, 'train', neg_type, datasets) 81 | val_loader = load_dataloader(model_type, 'val' , neg_type, datasets) 82 | 83 | trainer = Trainer(model, train_loader, val_loader, loss_fn, optimizer, scheduler, epochs, device, log_interval) 84 | log_message(f'Start train {model_type} with {neg_type}') 85 | 86 | losses_avg, losses_history = trainer.fit() 87 | model_id, model_name = trainer.save_model() 88 | 89 | col_models.save({ 90 | 'model_id' : model_id, 91 | 'model_name': model_name, 92 | 'dataset_id': datas['date'], 93 | 'neg_type' : neg_type, 94 | 'model_type': model_type, 95 | 'parameters': { 96 | 'lr': lr, 97 | 'epochs': epochs, 98 | 'batch_size': batch_size 99 | }, 100 | 'losses': { 101 | 'avg' : losses_avg, 102 | 'history': losses_history 103 | } 104 | }) 105 | 106 | log_message(f'Finish train {model_type} with {neg_type}') 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /neural_loop_combiner/dataset/dataset.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import os 5 | import math 6 | import random 7 | import librosa 8 | import datetime 9 | import numpy as np 10 | 11 | from neural_loop_combiner.config import settings 12 | from neural_loop_combiner.utils.utils import log_message, data_shuffle, data_deduplicate 13 | from neural_loop_combiner.utils.utils import save_json, get_save_dir, remove_prefix_dir 14 | from neural_loop_combiner.dataset.dataSampler import DataSampler 15 | 16 | class Dataset: 17 | 18 | def __init__(self, tracks_dict, harm_datas, log_info=[]): 19 | 20 | self.sr = settings.SR 21 | self.dur = settings.DUR 22 | self.log = settings.LOG 23 | self.out_dir = settings.OUT_DIR 24 | self.test_size = settings.TEST_SIZE 25 | self.ng_types = [neg_type for neg_type in settings.NG_TYPES.keys() if settings.NG_TYPES[neg_type] == 1] 26 | self.split_ratio = settings.SPLIT_RATIO 27 | self.harm_datas = harm_datas 28 | self.tracks_dict = tracks_dict 29 | self.tracks_key = self.data_split() 30 | 31 | self.pos_datas = self.pos_datas_retrieve() 32 | self.idv_datas = self.idv_datas_retrieve() 33 | self.neg_datas = self.neg_datas_retrieve() 34 | 35 | def idv_datas_retrieve(self): 36 | pos_datas = self.pos_datas 37 | return {data_type: data_deduplicate([idv_loop for pair in pos_datas[data_type] for idv_loop in pair]) for data_type in pos_datas} 38 | 39 | def data_split(self): 40 | test_size = self.test_size 41 | tracks_dict = self.tracks_dict 42 | split_ratio = self.split_ratio 43 | tracks_key = data_shuffle(list(tracks_dict.keys())) 44 | 45 | test_tracks_key = [track_key for track_key in tracks_key if len(tracks_dict[track_key]['pairs_path']) == 1][:test_size] 46 | other_tracks_key = [track_key for track_key in tracks_key if track_key not in test_tracks_key] 47 | 48 | pairs_num = [len(tracks_dict[key]['pairs_path']) for key in other_tracks_key] 49 | pairs_acc = [sum(pairs_num[0:i]) for i in range(1, len(pairs_num))] 50 | val_pairs_num = math.floor(pairs_acc[-1] * (1 - split_ratio)) 51 | split_index = [i for i in range(len(pairs_acc)) if pairs_acc[i] > val_pairs_num][0] 52 | 53 | val_tracks_key = other_tracks_key[0:split_index] 54 | train_tracks_key = other_tracks_key[split_index:] 55 | 56 | return { 57 | 'val' : val_tracks_key, 58 | 'test' : test_tracks_key, 59 | 'train': train_tracks_key, 60 | } 61 | 62 | def pos_datas_retrieve(self): 63 | log = self.log 64 | tracks_dict = self.tracks_dict 65 | tracks_key = self.tracks_key 66 | pos_datas = {} 67 | 68 | for data_type in tracks_key: 69 | tmp = [tracks_dict[tracks_key]['pairs_path'] for tracks_key in tracks_key[data_type]] 70 | pos_datas[data_type] = [pair for tracks in tmp for pair in tracks] 71 | if log: log_message('Positive Retrieve completed') 72 | return pos_datas 73 | 74 | 75 | def neg_datas_retrieve(self): 76 | log = self.log 77 | harm_datas = self.harm_datas 78 | tracks_dict = self.tracks_dict 79 | tracks_key = self.tracks_key 80 | idv_datas = self.idv_datas 81 | neg_datas = {} 82 | 83 | for data_type in tracks_key: 84 | if data_type != 'test': 85 | neg_datas[data_type] = DataSampler(tracks_key[data_type], tracks_dict, idv_datas[data_type], harm_datas, data_type).sampling() 86 | if log: log_message('Negative Retrieve completed') 87 | return neg_datas 88 | 89 | 90 | def datas_retrieve(self): 91 | out_dir = self.out_dir 92 | ng_types = self.ng_types 93 | pos_datas = self.pos_datas 94 | neg_datas = self.neg_datas 95 | idv_datas = self.idv_datas 96 | 97 | date = datetime.datetime.utcnow() 98 | loops_count = {data_type: len(idv_datas[data_type]) for data_type in idv_datas} 99 | pairs_count = {data_type: len(pos_datas[data_type]) for data_type in pos_datas} 100 | 101 | loops_count['total'] = sum([loops_count[data_type] for data_type in loops_count]) 102 | pairs_count['total'] = sum([pairs_count[data_type] for data_type in pairs_count]) 103 | 104 | datas = { 105 | 'pos' : pos_datas, 106 | 'neg' : neg_datas, 107 | 'idv' : idv_datas, 108 | } 109 | 110 | data_dir = get_save_dir(out_dir, ['datasets']) 111 | data_path = os.path.join(data_dir, f'{date}.json') 112 | save_json(datas, data_path) 113 | 114 | return { 115 | 'date' : date, 116 | 'data_path' : remove_prefix_dir(data_path, out_dir), 117 | 'neg_types' : ng_types, 118 | 'loops_count': loops_count, 119 | 'pairs_count': pairs_count 120 | } 121 | 122 | 123 | -------------------------------------------------------------------------------- /neural_loop_combiner/trainer/trainer.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import os 5 | import torch 6 | import librosa 7 | import datetime 8 | import numpy as np 9 | 10 | from neural_loop_combiner.config import settings 11 | from neural_loop_combiner.utils.utils import get_save_dir, save_audio, save_np 12 | from neural_loop_combiner.utils.utils import log_message 13 | from neural_loop_combiner.models.models import Skeleton, CNN, SNN 14 | 15 | class Trainer: 16 | 17 | def __init__(self, model, train_loader, val_loader, loss_fn, optimizer, scheduler, epochs, device, log_interval): 18 | 19 | self.sr = settings.SR 20 | self.log = settings.LOG 21 | self.out_dir = settings.OUT_DIR 22 | 23 | self.model = model.to(device) 24 | self.loss_fn = loss_fn 25 | self.epochs = epochs 26 | self.device = device 27 | self.optimizer = optimizer 28 | self.scheduler = scheduler 29 | self.train_loader = train_loader 30 | self.val_loader = val_loader 31 | self.log_interval = log_interval 32 | 33 | def fit(self): 34 | device = self.device 35 | scheduler = self.scheduler 36 | epochs = self.epochs 37 | model = self.model 38 | 39 | losses_history = { 'train': [], 'val' : [] } 40 | for epoch in range(0, epochs): 41 | scheduler.step() 42 | train_loss = self.train() 43 | val_loss = self.validate() 44 | log_message(f'Train Loss: {train_loss}, Val Loss: {val_loss}', [epoch + 1, epochs]) 45 | 46 | losses_history['val'].append(val_loss) 47 | losses_history['train'].append(train_loss) 48 | losses_avg = {data_type: np.mean(np.array(losses_history[data_type])) for data_type in losses_history} 49 | 50 | return losses_avg, losses_history 51 | 52 | 53 | 54 | 55 | def train(self): 56 | log = self.log 57 | device = self.device 58 | model = self.model 59 | loss_fn = self.loss_fn 60 | scheduler = self.scheduler 61 | optimizer = self.optimizer 62 | train_loader = self.train_loader 63 | log_interval = self.log_interval 64 | 65 | model.train() 66 | train_loss = 0 67 | 68 | for batch_idx, (*datas, targets) in enumerate(train_loader): 69 | inputs, *others = datas 70 | if not type(inputs) in (tuple, list): 71 | inputs = (inputs,) 72 | 73 | if device: 74 | inputs = tuple(i.to(device) for i in inputs) 75 | if targets is not None: 76 | targets = targets.to(device) 77 | 78 | targets = torch.tensor(targets, dtype=torch.float, device=device) 79 | optimizer.zero_grad() 80 | outputs = model(*inputs) 81 | 82 | if type(outputs) not in (tuple, list): 83 | outputs = (outputs,) 84 | 85 | loss_inputs = outputs 86 | if targets is not None: 87 | targets = (targets,) 88 | loss_inputs += targets 89 | 90 | loss = loss_fn(*loss_inputs) 91 | loss.backward() 92 | train_loss += loss.item() 93 | optimizer.step() 94 | if batch_idx % log_interval == 0 & log: log_message(f'Loss: {loss.item()}', [batch_idx, len(train_loader)]) 95 | 96 | self.model = model 97 | return train_loss / (batch_idx + 1) 98 | 99 | 100 | def validate(self): 101 | device = self.device 102 | model = self.model 103 | loss_fn = self.loss_fn 104 | scheduler = self.scheduler 105 | optimizer = self.optimizer 106 | val_loader = self.val_loader 107 | val_loss = 0 108 | with torch.no_grad(): 109 | model.eval() 110 | for batch_idx, (*datas, targets) in enumerate(val_loader): 111 | inputs, *others = datas 112 | if not type(inputs) in (tuple, list): 113 | inputs = (inputs,) 114 | 115 | if device: 116 | inputs = tuple(i.to(device) for i in inputs) 117 | if targets is not None: 118 | targets = targets.to(device) 119 | 120 | targets = torch.tensor(targets, dtype=torch.float, device=device) 121 | outputs = model(*inputs) 122 | 123 | if type(outputs) not in (tuple, list): 124 | outputs = (outputs,) 125 | 126 | loss_inputs = outputs 127 | if targets is not None: 128 | targets = (targets,) 129 | loss_inputs += targets 130 | 131 | loss = loss_fn(*loss_inputs) 132 | val_loss += loss.item() 133 | return val_loss / (batch_idx + 1) 134 | 135 | 136 | 137 | 138 | def save_model(self): 139 | 140 | log = self.log 141 | model = self.model 142 | out_dir = self.out_dir 143 | 144 | model_dir = get_save_dir(out_dir, ['models']) 145 | model_id = datetime.datetime.utcnow() 146 | model_name = f'{model_id}.pkl' 147 | model_path = os.path.join(model_dir, model_name) 148 | 149 | torch.save(model.state_dict(), model_path) 150 | if log: log_message(f'Saved modle {model_name}') 151 | 152 | return model_id, model_name 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /neural_loop_combiner/dataset/pipeline/extractor.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | 4 | import os, librosa 5 | import numpy as np 6 | import tensorly as tl 7 | 8 | from loopextractor.loopextractor.loopextractor import get_downbeats, make_spectral_cube, validate_template_sizes 9 | from loopextractor.loopextractor.loopextractor import get_loop_signal, create_loop_spectrum, choose_bar_to_reconstruct 10 | 11 | from neural_loop_combiner.utils.utils import get_save_dir, save_audio, save_np, load_np, log_message, discrete_matrix 12 | from neural_loop_combiner.utils.comparison import spec_similarity, snr_cal 13 | from neural_loop_combiner.config import settings 14 | 15 | tl.set_backend('pytorch') 16 | 17 | class Extractor: 18 | 19 | def __init__(self, file_name, media_type, gpu_num, log_info=[]): 20 | 21 | self.sr = settings.SR 22 | self.cache = settings.CACHE 23 | self.log = settings.LOG 24 | self.int_dir = settings.INT_DIR 25 | self.out_dir = settings.OUT_DIR 26 | self.exist_thre = settings.EXISTED_THRESHOLD 27 | 28 | self.log_info = log_info 29 | self.file_name = file_name 30 | self.media_type = media_type 31 | self.gpu_num = gpu_num 32 | self.file_path = os.path.join(self.int_dir, f'{file_name}.{media_type}') 33 | self.track_audio = librosa.load(self.file_path, self.sr)[0] 34 | 35 | if self.log: log_message('Extractor started', self.log_info) 36 | self.decompose() 37 | if self.log: log_message('Decomposition completed', self.log_info) 38 | self.layout_retrieve() 39 | self.template = discrete_matrix(self.layout, self.exist_thre) 40 | if self.log: log_message('Layout & Template retrive completed', self.log_info) 41 | self.loop_extract() 42 | if self.log: log_message('Extractor completed', self.log_info) 43 | 44 | 45 | def decompose(self): 46 | sr = self.sr 47 | out_dir = self.out_dir 48 | cache = self.cache 49 | log = self.log 50 | log_info = self.log_info 51 | gpu_num = self.gpu_num 52 | file_name = self.file_name 53 | track_audio = self.track_audio 54 | 55 | downbeat_path = os.path.join(get_save_dir(out_dir, ['extracted', 'downbeat']), f'{file_name}.npy') 56 | core_path = os.path.join(get_save_dir(out_dir, ['extracted', 'core']) , f'{file_name}.npy') 57 | factors_path = os.path.join(get_save_dir(out_dir, ['extracted', 'factors']) , f'{file_name}.npy') 58 | 59 | downbeat_times = load_np(downbeat_path, cache) 60 | core = load_np(core_path , cache) 61 | factors = load_np(factors_path , cache) 62 | 63 | 64 | downbeat_times = downbeat_times if type(downbeat_times) != type(None) else get_downbeats(track_audio) 65 | downbeat_frames = librosa.time_to_samples(downbeat_times, sr=sr) 66 | 67 | if log: log_message('Downbeat completed', log_info) 68 | spectral_cube = make_spectral_cube(track_audio, downbeat_frames) 69 | n_sounds, n_rhythms, n_loops = validate_template_sizes(spectral_cube, n_templates=[0,0,0]) 70 | if type(core) == type(None) or type(factors) == type(None): 71 | core, factors = tl.decomposition.non_negative_tucker(tl.tensor(np.abs(spectral_cube), device=f'cuda:{gpu_num}'), 72 | [n_sounds, n_rhythms, n_loops], n_iter_max=500, verbose=True) 73 | core = np.array(core.detach().to('cpu').numpy()) 74 | factors = [np.array(factor.detach().to('cpu').numpy()) for factor in factors] 75 | 76 | if cache: 77 | save_np(downbeat_path, downbeat_times) 78 | save_np(core_path , core) 79 | save_np(factors_path , factors) 80 | 81 | self.n_loops = n_loops 82 | self.spectral_cube = spectral_cube 83 | self.core = core 84 | self.factors = factors 85 | self.downbeat_frames = downbeat_frames 86 | 87 | 88 | def get_ith_loop_info(self, index): 89 | return self.bar_audios[index], self.loops[index], self.snr_scores[index] 90 | 91 | def layout_retrieve(self): 92 | self.layout = self.factors[2] 93 | 94 | def loop_reconstruct(self, ith_loop): 95 | n_loops = self.n_loops 96 | core = self.core 97 | factors = self.factors 98 | track_audio = self.track_audio 99 | spectral_cube = self.spectral_cube 100 | downbeat_frames = self.downbeat_frames 101 | 102 | if ith_loop >= n_loops: 103 | return None, None 104 | loop_spec = create_loop_spectrum(factors[0], factors[1], core[:,:,ith_loop]) 105 | bar_index = choose_bar_to_reconstruct(factors[2], ith_loop) 106 | ith_loop_signal = get_loop_signal(loop_spec, spectral_cube[:,:,bar_index]) 107 | bar_audio = track_audio[downbeat_frames[bar_index]: downbeat_frames[bar_index+1]] 108 | snr_score = snr_cal(bar_audio, ith_loop_signal) 109 | 110 | return bar_audio, ith_loop_signal, loop_spec, snr_score 111 | 112 | def loop_extract(self): 113 | n_loops = self.n_loops 114 | 115 | bar_audios, loops, stretch_loops, specs, snr_scores = {}, {}, {}, {}, {} 116 | 117 | for ith_loop in range(n_loops): 118 | bar_audio, loop, spec, snr_score = self.loop_reconstruct(ith_loop) 119 | loops[ith_loop] = loop 120 | specs[ith_loop] = spec 121 | bar_audios[ith_loop] = bar_audio 122 | snr_scores[ith_loop] = snr_score 123 | 124 | self.bar_audios = bar_audios 125 | self.loops = loops 126 | self.specs = specs 127 | self.snr_scores = snr_scores 128 | 129 | def get_features(self): 130 | return { 131 | 'layout': self.layout, 132 | 'loops' : self.loops , 133 | 'specs' : self.specs , 134 | 'template': self.template, 135 | 'snr_scores': self.snr_scores, 136 | 'track_audio' : self.track_audio , 137 | } 138 | 139 | --------------------------------------------------------------------------------