├── .gitignore ├── README.md ├── app.py ├── compute_songs.py ├── draw_neural_net.py ├── fma.py ├── load_csv.py ├── nn_digaram.png ├── requirements.txt ├── run_docker.sh ├── solution ├── app.py └── plot.py └── subject.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | fma_small.zip 2 | fma_metadata.zip 3 | fma_small 4 | fma_metadata 5 | data 6 | data.zip 7 | __pycache__ 8 | .ipynb_checkpoints 9 | computed -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Workshop Neural Network - Identify the genre of a music :computer: 2 | 3 | ## Table of Contents 4 | 5 | 1. [Introduction](#introduction-dart) 6 | 2. [Requirements](#requirements-books) 7 | 3. [Workshop Content](#workshop-content-bulb) 8 | 4. [Sources](#sources-notebook) 9 | 10 | ## Introduction :dart: 11 | 12 | Artificial intelligence is one of the most exciting topics in computer science today. The following workshop session aims to give you a general understanding of how genetic algorithm works, its benefits and limitations, and how to implement it in your own A.I. projects. 13 | Workshop to discover Neural Network through the subject of identify the genre of a music 14 | 15 | ## Requirements :books: 16 | 17 | The workshop is based on python container to ease the work. You must have Docker :whale: installed in your environment. 18 | 19 | Once you arrive to the 2nd step, you will need to download the [data.zip](https://drive.google.com/drive/folders/1Qy9P7WEWRzHVr1rd9Nj4_QFQFWZNwsSG), put it in the root of the project, extract it (**manually**) and execute the following commands: 20 | 21 | ```bash 22 | > sudo docker run -v $PWD:/mnt -w /mnt -it miseyu/docker-ubuntu16-python3.6:latest 23 | $> apt-get update && apt-get install -y ffmpeg 24 | $> pip install --upgrade pip 25 | $> python3.6 -m pip install -r requirements.txt 26 | ``` 27 | 28 | If you exit the container, you can use the following command to restart it: 29 | ```bash 30 | > docker start [container_id] 31 | > docker exec -it -w /mnt [container_id] bash 32 | ``` 33 | 34 | ## Workshop Content :bulb: 35 | 36 | The workshop is divided into 3 parts: 37 | 38 | - **Part 1**: Introduction to neural networks and the basics of the machine learning 39 | - **Part 2**: Developing a neural network 40 | - **Part 3**: Testing and upgrading the neural network 41 | 42 | **You will find the content of all these parts in the pdf [here](https://github.com/Mitix-EPI/Workshop-Neural-Network/blob/main/subject.pdf)** 43 | 44 | ## Sources :notebook: 45 | 46 | Datas from [here](https://github.com/mdeff/fma) 47 | 48 | How to extract the data from [here](https://github.com/crowdAI/crowdai-musical-genre-recognition-starter-kit) 49 | 50 | Helps us [link](https://navdeepsinghh.medium.com/identifying-the-genre-of-a-song-with-neural-networks-851db89c42f0) 51 | 52 | Good other project [link](https://towardsdatascience.com/using-cnns-and-rnns-for-music-genre-recognition-2435fb2ed6af) 53 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import time 3 | import fma 4 | 5 | labels = pd.read_csv('./data/train_labels.csv', index_col=0) 6 | nb_genres = 0 7 | nb_features = 10000 # Variable that you can change 8 | nb_music_by_genre = 5 # Varaible that you can change 9 | 10 | import numpy as np 11 | import os 12 | import random 13 | 14 | # function to get all the tracks from a genre in labels variable 15 | def get_genre_songs(genre, limits=1000): 16 | global labels 17 | paths = [] 18 | tmp = labels.loc[labels['genre'] == genre] 19 | indexes = tmp.index.values 20 | for i in indexes: 21 | path = fma.get_audio_path(i) 22 | if (os.path.exists(path)): 23 | paths.append(path) 24 | if (len(paths) < limits): 25 | return paths 26 | random_paths = random.choices(paths, k=limits) 27 | return random_paths 28 | 29 | def time_convert(sec): 30 | mins = sec // 60 31 | sec = sec % 60 32 | hours = mins // 60 33 | mins = mins % 60 34 | print("\nTime Lapsed = {0}:{1}:{2}\n".format(int(hours),int(mins),sec)) 35 | 36 | def get_features_song(f): 37 | global nb_features 38 | try: 39 | features = np.genfromtxt(f, delimiter=',')[:nb_features] 40 | if (len(features) == nb_features): 41 | return features 42 | return [] 43 | except: 44 | return [] 45 | 46 | def display_details_compute(genres, arr_nb_songs_by_genre): 47 | for i in range(len(genres)): 48 | print("{} songs in {} genre".format(arr_nb_songs_by_genre[i], genres[i])) 49 | 50 | def generate_features_and_labels(nb_music_by_genre): 51 | global nb_genres 52 | all_features = [] 53 | all_labels = [] 54 | 55 | GENRES = ['Electronic', 'Experimental', 'Folk', 'Hip-Hop', 56 | 'Instrumental', 'International', 'Pop', 'Rock'] 57 | 58 | perc_index = 0 59 | perc_total = nb_music_by_genre * len(GENRES) 60 | 61 | arr_nb_songs_by_genre = [] 62 | start_time = time.time() # Calc time to compute 63 | for genre in GENRES: 64 | songs_computed = 0 65 | sound_files = get_genre_songs(genre, limits=nb_music_by_genre) # 100 66 | print('Processing %d songs in %s genre...' % (len(sound_files), genre)) 67 | if sound_files: 68 | nb_genres += 1 69 | for f in sound_files: 70 | if (not os.path.isfile(f)) : 71 | continue 72 | perc_index += 1 73 | print("\t-> Processing ", f, "... [", "{:.2f}".format(perc_index * 100 / perc_total), "%]") 74 | features = get_features_song(f) 75 | if len(features): 76 | all_features.append(features) 77 | all_labels.append(genre) 78 | songs_computed += 1 79 | arr_nb_songs_by_genre.append(songs_computed) 80 | # convert labels to one-hot encoding 81 | label_uniq_ids, label_row_ids = np.unique(all_labels, return_inverse=True) 82 | label_row_ids = label_row_ids.astype(np.int32, copy=False) 83 | # onehot_labels = to_categorical(label_row_ids, len(label_uniq_ids)) 84 | onehot_labels = np.eye(len(label_uniq_ids))[label_row_ids] 85 | end_time = time.time() 86 | time_lapsed = end_time - start_time 87 | time_convert(time_lapsed) # Show time to compute 88 | print(min([np.shape(i) for i in all_features])) 89 | print(display_details_compute(GENRES, arr_nb_songs_by_genre)) 90 | return np.stack(all_features), onehot_labels 91 | 92 | features, labels = generate_features_and_labels(nb_music_by_genre) 93 | 94 | print("np.shape(features): ", np.shape(features)) 95 | print("np.shape(labels): ", np.shape(labels)) 96 | 97 | training_split = 0.8 98 | 99 | # last column has genre, turn it into unique ids 100 | alldata = np.column_stack((features, labels)) 101 | 102 | np.random.shuffle(alldata) 103 | splitidx = int(len(alldata) * training_split) 104 | train, test = alldata[:splitidx,:], alldata[splitidx:,:] 105 | 106 | print("np.shape(train): ", np.shape(train)) 107 | print("np.shape(test): ", np.shape(test)) 108 | 109 | print("nb_genres: ", nb_genres) 110 | 111 | X_train = train[:,:-nb_genres] 112 | Y_train = train[:,-nb_genres:].astype(int) 113 | 114 | X_test = test[:,:-nb_genres] 115 | Y_test = test[:,-nb_genres:].astype(int) 116 | 117 | print("np.shape(X_train): ", np.shape(X_train)) 118 | print("np.shape(Y_train): ", np.shape(Y_train)) 119 | 120 | from sklearn.neural_network import MLPClassifier 121 | 122 | start_time = time.time() # Calc time to compute 123 | 124 | nb_hidden_layer_sizes = (15,) 125 | 126 | print("\nCreating model...") 127 | """ 128 | Creating Model 129 | """ 130 | 131 | clf = None # TODO 132 | 133 | """ 134 | End Creating Model 135 | """ 136 | 137 | print("Training the NN...") 138 | 139 | """ 140 | Training NN 141 | """ 142 | 143 | # TODO 144 | 145 | """ 146 | End Training NN 147 | """ 148 | 149 | end_time = time.time() 150 | time_lapsed = end_time - start_time 151 | time_convert(time_lapsed) # Show time to compute 152 | 153 | print("Testing Neural Network...") 154 | """ 155 | Testing Neural Network with X_test 156 | """ 157 | 158 | predict_test = None # TODO 159 | 160 | """ 161 | End Testing Neural Network 162 | """ 163 | 164 | from sklearn.metrics import classification_report,confusion_matrix 165 | 166 | print(confusion_matrix(Y_test.argmax(axis=1),predict_test.argmax(axis=1))) 167 | print(classification_report(Y_test.argmax(axis=1),predict_test.argmax(axis=1))) 168 | 169 | ans = input("Do you want to generate the NN graph [Y/N] ? ") 170 | if (ans == "Y" or ans == "y" or ans == ""): 171 | import matplotlib.pyplot as plt 172 | from draw_neural_net import draw_neural_net 173 | 174 | print("Generating Neural Network Graph ...") 175 | fig = plt.figure(figsize=(12, 12)) 176 | ax = fig.gca() 177 | ax.axis('off') 178 | 179 | layer_sizes = [int(nb_features / 1000)] + list(nb_hidden_layer_sizes) + [nb_genres] 180 | draw_neural_net(ax, .1, .9, .1, .9, layer_sizes, clf.coefs_, clf.intercepts_, clf.n_iter_, clf.loss_) 181 | fig.savefig('nn_digaram.png') 182 | 183 | # How to upgrade the NN ? 184 | # -> Create a cross-validation set. 185 | # -> Plot learning curves. (https://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html) 186 | # -> Establish if there is high bias or high variance. 187 | # -> Tune hyperparameters. 188 | -------------------------------------------------------------------------------- /compute_songs.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | labels = pd.read_csv('./data/train_labels.csv', index_col=0) 3 | 4 | import fma 5 | path = fma.get_audio_path(1042) 6 | nb_genres = 0 7 | 8 | print(path) 9 | 10 | import librosa 11 | import numpy as np 12 | # from tensorflow.keras.utils import to_categorical 13 | import os 14 | import random 15 | 16 | def get_genre_songs(genre, limits=1000): 17 | global labels 18 | paths = [] 19 | tmp = labels.loc[labels['genre'] == genre] 20 | indexes = tmp.index.values 21 | for i in indexes: 22 | path = fma.get_audio_path(i) 23 | if (os.path.exists(path)): 24 | paths.append(path) 25 | if (len(paths) < limits): 26 | return paths 27 | random_paths = random.choices(paths, k=limits) 28 | return random_paths 29 | 30 | def extract_features_song(f): 31 | y, _ = librosa.load(f) 32 | 33 | # get Mel-frequency cepstral coefficients 34 | mfcc = librosa.feature.mfcc(y) 35 | # normalize values between -1,1 (divide by max) 36 | mfcc /= np.amax(np.absolute(mfcc)) 37 | test = np.ndarray.flatten(mfcc) 38 | return test 39 | 40 | nb_genres = 0 41 | all_features = [] 42 | all_labels = [] 43 | 44 | GENRES = ['Electronic', 'Experimental', 'Folk', 'Hip-Hop', 45 | 'Instrumental', 'International', 'Pop', 'Rock'] 46 | for genre in GENRES: 47 | sound_files = get_genre_songs(genre, limits=1000) # 100 48 | print('Processing %d songs in %s genre...' % (len(sound_files), genre)) 49 | if sound_files: 50 | nb_genres += 1 51 | for f in sound_files: 52 | print("\t-> Processing %s..." % f) 53 | if (not os.path.isdir("data/" + (f.split('/')[1]))) : 54 | os.system("mkdir " + "data/" + (f.split('/')[1])) 55 | if (not os.path.isdir("data/" + (f.split('/')[1]) + '/' + f.split('/')[2])) : 56 | os.system("mkdir " + "data/" + (f.split('/')[1]) + '/' + f.split('/')[2]) 57 | if (os.path.isfile("data" + f[4:-4] + ".csv")): 58 | continue 59 | try : 60 | features = extract_features_song(f) 61 | except : 62 | continue 63 | os.system("touch data" + f[4:-4] + ".csv") 64 | np.savetxt("data" + f[4:-4] + ".csv", features, delimiter=",") 65 | all_features.append(features) 66 | all_labels.append(genre) 67 | -------------------------------------------------------------------------------- /draw_neural_net.py: -------------------------------------------------------------------------------- 1 | ## Gist originally developed by @craffel and improved by @ljhuang2017 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | def draw_neural_net(ax, left, right, bottom, top, layer_sizes, coefs_, intercepts_, n_iter_, loss_): 7 | ''' 8 | Draw a neural network cartoon using matplotilb. 9 | 10 | :usage: 11 | >>> fig = plt.figure(figsize=(12, 12)) 12 | >>> draw_neural_net(fig.gca(), .1, .9, .1, .9, [4, 7, 2]) 13 | 14 | :parameters: 15 | - ax : matplotlib.axes.AxesSubplot 16 | The axes on which to plot the cartoon (get e.g. by plt.gca()) 17 | - left : float 18 | The center of the leftmost node(s) will be placed here 19 | - right : float 20 | The center of the rightmost node(s) will be placed here 21 | - bottom : float 22 | The center of the bottommost node(s) will be placed here 23 | - top : float 24 | The center of the topmost node(s) will be placed here 25 | - layer_sizes : list of int 26 | List of layer sizes, including input and output dimensionality 27 | ''' 28 | n_layers = len(layer_sizes) 29 | v_spacing = (top - bottom)/float(max(layer_sizes)) 30 | h_spacing = (right - left)/float(len(layer_sizes) - 1) 31 | 32 | # Input-Arrows 33 | layer_top_0 = v_spacing*(layer_sizes[0] - 1)/2. + (top + bottom)/2. 34 | for m in range(layer_sizes[0]): 35 | plt.arrow(left-0.18, layer_top_0 - m*v_spacing, 0.12, 0, lw =1, head_width=0.01, head_length=0.02) 36 | 37 | # Nodes 38 | for n, layer_size in enumerate(layer_sizes): 39 | layer_top = v_spacing*(layer_size - 1)/2. + (top + bottom)/2. 40 | for m in range(layer_size): 41 | circle = plt.Circle((n*h_spacing + left, layer_top - m*v_spacing), v_spacing/8., 42 | color='w', ec='k', zorder=4) 43 | if n == 0: 44 | plt.text(left-0.125, layer_top - m*v_spacing, r'$X_{'+str(m+1)+'}$', fontsize=15) 45 | elif (n_layers == 3) & (n == 1): 46 | plt.text(n*h_spacing + left+0.00, layer_top - m*v_spacing+ (v_spacing/8.+0.01*v_spacing), r'$H_{'+str(m+1)+'}$', fontsize=15) 47 | elif n == n_layers -1: 48 | plt.text(n*h_spacing + left+0.10, layer_top - m*v_spacing, r'$y_{'+str(m+1)+'}$', fontsize=15) 49 | ax.add_artist(circle) 50 | # Bias-Nodes 51 | for n, layer_size in enumerate(layer_sizes): 52 | if n < n_layers -1: 53 | x_bias = (n+0.5)*h_spacing + left 54 | y_bias = top + 0.005 55 | circle = plt.Circle((x_bias, y_bias), v_spacing/8., color='w', ec='k', zorder=4) 56 | plt.text(x_bias-(v_spacing/8.+0.10*v_spacing+0.01), y_bias, r'$1$', fontsize=15) 57 | ax.add_artist(circle) 58 | # Edges 59 | # Edges between nodes 60 | for n, (layer_size_a, layer_size_b) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): 61 | layer_top_a = v_spacing*(layer_size_a - 1)/2. + (top + bottom)/2. 62 | layer_top_b = v_spacing*(layer_size_b - 1)/2. + (top + bottom)/2. 63 | for m in range(layer_size_a): 64 | for o in range(layer_size_b): 65 | line = plt.Line2D([n*h_spacing + left, (n + 1)*h_spacing + left], 66 | [layer_top_a - m*v_spacing, layer_top_b - o*v_spacing], c='k') 67 | ax.add_artist(line) 68 | xm = (n*h_spacing + left) 69 | xo = ((n + 1)*h_spacing + left) 70 | ym = (layer_top_a - m*v_spacing) 71 | yo = (layer_top_b - o*v_spacing) 72 | rot_mo_rad = np.arctan((yo-ym)/(xo-xm)) 73 | rot_mo_deg = rot_mo_rad*180./np.pi 74 | xm1 = xm + (v_spacing/8.+0.05)*np.cos(rot_mo_rad) 75 | if n == 0: 76 | if yo > ym: 77 | ym1 = ym + (v_spacing/8.+0.12)*np.sin(rot_mo_rad) 78 | else: 79 | ym1 = ym + (v_spacing/8.+0.05)*np.sin(rot_mo_rad) 80 | else: 81 | if yo > ym: 82 | ym1 = ym + (v_spacing/8.+0.12)*np.sin(rot_mo_rad) 83 | else: 84 | ym1 = ym + (v_spacing/8.+0.04)*np.sin(rot_mo_rad) 85 | plt.text( xm1, ym1,\ 86 | str(round(coefs_[n][m, o],4)),\ 87 | rotation = rot_mo_deg, \ 88 | fontsize = 10) 89 | # Edges between bias and nodes 90 | for n, (layer_size_a, layer_size_b) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): 91 | if n < n_layers-1: 92 | layer_top_a = v_spacing*(layer_size_a - 1)/2. + (top + bottom)/2. 93 | layer_top_b = v_spacing*(layer_size_b - 1)/2. + (top + bottom)/2. 94 | x_bias = (n+0.5)*h_spacing + left 95 | y_bias = top + 0.005 96 | for o in range(layer_size_b): 97 | line = plt.Line2D([x_bias, (n + 1)*h_spacing + left], 98 | [y_bias, layer_top_b - o*v_spacing], c='k') 99 | ax.add_artist(line) 100 | xo = ((n + 1)*h_spacing + left) 101 | yo = (layer_top_b - o*v_spacing) 102 | rot_bo_rad = np.arctan((yo-y_bias)/(xo-x_bias)) 103 | rot_bo_deg = rot_bo_rad*180./np.pi 104 | xo2 = xo - (v_spacing/8.+0.01)*np.cos(rot_bo_rad) 105 | yo2 = yo - (v_spacing/8.+0.01)*np.sin(rot_bo_rad) 106 | xo1 = xo2 -0.05 *np.cos(rot_bo_rad) 107 | yo1 = yo2 -0.05 *np.sin(rot_bo_rad) 108 | plt.text( xo1, yo1,\ 109 | str(round(intercepts_[n][o],4)),\ 110 | rotation = rot_bo_deg, \ 111 | fontsize = 10) 112 | 113 | # Output-Arrows 114 | layer_top_0 = v_spacing*(layer_sizes[-1] - 1)/2. + (top + bottom)/2. 115 | for m in range(layer_sizes[-1]): 116 | plt.arrow(right+0.015, layer_top_0 - m*v_spacing, 0.16*h_spacing, 0, lw =1, head_width=0.01, head_length=0.02) 117 | # Record the n_iter_ and loss 118 | plt.text(left + (right-left)/3., bottom - 0.005*v_spacing, \ 119 | 'Steps:'+str(n_iter_)+' Loss: ' + str(round(loss_, 6)), fontsize = 15) -------------------------------------------------------------------------------- /fma.py: -------------------------------------------------------------------------------- 1 | """Adapted from https://github.com/mdeff/fma/blob/master/utils.py""" 2 | 3 | import os 4 | import ast 5 | 6 | import pandas as pd 7 | 8 | def get_audio_path(track_id, solution=False): 9 | tid_str = '{:06d}'.format(track_id) 10 | if (solution == False): 11 | return os.path.join('data', 'fma_small', tid_str[:3], tid_str + '.csv') 12 | else: 13 | return os.path.join('../data', 'fma_small', tid_str[:3], tid_str + '.csv') 14 | -------------------------------------------------------------------------------- /load_csv.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | def read_directory(directory): 5 | files = [] 6 | for filename in os.listdir(directory): 7 | if filename.endswith(".csv"): 8 | files.append(directory + filename) 9 | if (os.path.isdir(directory + filename)): 10 | files += read_directory(directory + filename + "/") 11 | return files 12 | 13 | for f in read_directory("data/") : 14 | print(np.genfromtxt(f, delimiter=',')) -------------------------------------------------------------------------------- /nn_digaram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mitix-EPI/Workshop-Neural-Network/dc730bc79e734cb761110c8a3935678ac0dea387/nn_digaram.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.3 2 | audioread==2.1.5 3 | certifi==2017.11.5 4 | chardet==3.0.4 5 | click==6.7 6 | crowdai==1.0.14 7 | decorator==4.1.2 8 | enum-compat==0.0.2 9 | enum34==1.1.6 10 | Flask==0.12.2 11 | idna==2.6 12 | itsdangerous==0.24 13 | Jinja2==2.9.6 14 | joblib==0.11 15 | llvmlite==0.20.0 16 | lxml==4.1.1 17 | MarkupSafe==1.0 18 | numba==0.35.0 19 | numpy==1.13.3 20 | packaging==16.8 21 | pandas==0.21.0 22 | pyparsing==2.2.0 23 | python-dateutil==2.6.1 24 | pytrends==4.3.0 25 | pytz==2017.3 26 | requests==2.14.2 27 | resampy==0.2.0 28 | scikit-learn==0.19.1 29 | scipy==1.0.0 30 | six==1.10.0 31 | socketIO-client-2==0.7.5 32 | termcolor==1.1.0 33 | tqdm==4.11.2 34 | urllib3==1.22 35 | websocket-client==0.40.0 36 | Werkzeug==0.12.2 37 | ffmpeg 38 | scikit-neuralnetwork 39 | matplotlib 40 | -------------------------------------------------------------------------------- /run_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo docker run -v $PWD:/mnt -w /mnt -it miseyu/docker-ubuntu16-python3.6:latest 4 | 5 | # If you quit the container, you can run the container again by typing: 6 | # docker start 7 | # docker exec -it bash 8 | 9 | # don't forget to stop the container by typing: 10 | # docker stop 11 | 12 | # Commands to run in the container: 13 | 14 | # apt-get update && apt-get install -y ffmpeg 15 | # pip install --upgrade pip 16 | # python3.6 -m pip install -r requirements.txt 17 | -------------------------------------------------------------------------------- /solution/app.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import time 3 | 4 | 5 | labels = pd.read_csv('.././data/train_labels.csv', index_col=0) 6 | nb_genres = 0 7 | nb_features = 10000 # Variable that you can change 8 | nb_music_by_genre = 5 # Varaible that you can change 9 | 10 | import sys, os, inspect 11 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 12 | parentdir = os.path.dirname(currentdir) 13 | sys.path.insert(0, parentdir) 14 | import fma 15 | import numpy as np 16 | import os 17 | import random 18 | 19 | # function to get all the tracks from a genre in labels variable 20 | def get_genre_songs(genre, limits=1000): 21 | global labels 22 | paths = [] 23 | tmp = labels.loc[labels['genre'] == genre] 24 | indexes = tmp.index.values 25 | for i in indexes: 26 | path = fma.get_audio_path(i, solution=True) 27 | if (os.path.exists(path)): 28 | paths.append(path) 29 | if (len(paths) < limits): 30 | return paths 31 | random_paths = random.choices(paths, k=limits) 32 | return random_paths 33 | 34 | def time_convert(sec): 35 | mins = sec // 60 36 | sec = sec % 60 37 | hours = mins // 60 38 | mins = mins % 60 39 | print("\nTime Lapsed = {0}:{1}:{2}\n".format(int(hours),int(mins),sec)) 40 | 41 | def get_features_song(f): 42 | global nb_features 43 | try: 44 | features = np.genfromtxt(f, delimiter=',')[:nb_features] 45 | if (len(features) == nb_features): 46 | return features 47 | return [] 48 | except: 49 | return [] 50 | 51 | def display_details_compute(genres, arr_nb_songs_by_genre): 52 | for i in range(len(genres)): 53 | print("{} songs in {} genre".format(arr_nb_songs_by_genre[i], genres[i])) 54 | 55 | def generate_features_and_labels(nb_music_by_genre): 56 | global nb_genres 57 | all_features = [] 58 | all_labels = [] 59 | 60 | GENRES = ['Electronic', 'Experimental', 'Folk', 'Hip-Hop', 61 | 'Instrumental', 'International', 'Pop', 'Rock'] 62 | arr_nb_songs_by_genre = [] 63 | start_time = time.time() # Calc time to compute 64 | for genre in GENRES: 65 | songs_computed = 0 66 | sound_files = get_genre_songs(genre, limits=nb_music_by_genre) # 100 67 | print('Processing %d songs in %s genre...' % (len(sound_files), genre)) 68 | if sound_files: 69 | nb_genres += 1 70 | for f in sound_files: 71 | if (not os.path.isfile(f)) : 72 | continue 73 | print("\t-> Processing %s ..." % f) 74 | features = get_features_song(f) 75 | if len(features): 76 | all_features.append(features) 77 | all_labels.append(genre) 78 | songs_computed += 1 79 | arr_nb_songs_by_genre.append(songs_computed) 80 | # convert labels to one-hot encoding 81 | label_uniq_ids, label_row_ids = np.unique(all_labels, return_inverse=True) 82 | label_row_ids = label_row_ids.astype(np.int32, copy=False) 83 | # onehot_labels = to_categorical(label_row_ids, len(label_uniq_ids)) 84 | onehot_labels = np.eye(len(label_uniq_ids))[label_row_ids] 85 | end_time = time.time() 86 | time_lapsed = end_time - start_time 87 | time_convert(time_lapsed) # Show time to compute 88 | print(min([np.shape(i) for i in all_features])) 89 | print(display_details_compute(GENRES, arr_nb_songs_by_genre)) 90 | return np.stack(all_features), onehot_labels 91 | 92 | features, labels = generate_features_and_labels(nb_music_by_genre) 93 | 94 | print("np.shape(features): ", np.shape(features)) 95 | print("np.shape(labels): ", np.shape(labels)) 96 | 97 | training_split = 0.8 98 | 99 | # last column has genre, turn it into unique ids 100 | alldata = np.column_stack((features, labels)) 101 | 102 | np.random.shuffle(alldata) 103 | splitidx = int(len(alldata) * training_split) 104 | train, test = alldata[:splitidx,:], alldata[splitidx:,:] 105 | 106 | print("np.shape(train): ", np.shape(train)) 107 | print("np.shape(test): ", np.shape(test)) 108 | 109 | print("nb_genres: ", nb_genres) 110 | 111 | X_train = train[:,:-nb_genres] 112 | Y_train = train[:,-nb_genres:].astype(int) 113 | 114 | X_test = test[:,:-nb_genres] 115 | Y_test = test[:,-nb_genres:].astype(int) 116 | 117 | print("np.shape(X_train): ", np.shape(X_train)) 118 | print("np.shape(Y_train): ", np.shape(Y_train)) 119 | 120 | from sklearn.neural_network import MLPClassifier 121 | 122 | start_time = time.time() # Calc time to compute 123 | 124 | nb_hidden_layer_sizes = (15,) 125 | 126 | print("\nCreating model...") 127 | clf = MLPClassifier(solver='lbfgs', alpha=1e-10, hidden_layer_sizes=nb_hidden_layer_sizes, activation = 'logistic', random_state=5, max_iter=15000, learning_rate_init = 0.1) 128 | print("Training the model...") 129 | clf.fit(X_train, Y_train) 130 | 131 | end_time = time.time() 132 | time_lapsed = end_time - start_time 133 | time_convert(time_lapsed) # Show time to compute 134 | 135 | print("Testing Neural Network...") 136 | predict_test = clf.predict(X_test) 137 | 138 | from sklearn.metrics import classification_report,confusion_matrix 139 | 140 | print(confusion_matrix(Y_test.argmax(axis=1),predict_test.argmax(axis=1))) 141 | print(classification_report(Y_test.argmax(axis=1),predict_test.argmax(axis=1))) 142 | 143 | ans = input("Do you want to generate the NN graph [Y/N] ? ") 144 | if (ans == "Y" or ans == "y" or ans == ""): 145 | import matplotlib.pyplot as plt 146 | from draw_neural_net import draw_neural_net 147 | 148 | print("Generating Neural Network Graph ...") 149 | fig = plt.figure(figsize=(12, 12)) 150 | ax = fig.gca() 151 | ax.axis('off') 152 | 153 | layer_sizes = [int(nb_features / 1000)] + list(nb_hidden_layer_sizes) + [nb_genres] 154 | draw_neural_net(ax, .1, .9, .1, .9, layer_sizes, clf.coefs_, clf.intercepts_, clf.n_iter_, clf.loss_) 155 | fig.savefig('nn_digaram.png') 156 | 157 | # How to upgrade the NN ? 158 | # -> PCA to reduce nb features and compute faster 159 | # -> Create a cross-validation set. 160 | # -> Plot learning curves. (https://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html) 161 | # -> Establish if there is high bias or high variance. 162 | # -> Tune hyperparameters. 163 | -------------------------------------------------------------------------------- /solution/plot.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import time 5 | from scipy import rand 6 | from sklearn.neural_network import MLPClassifier 7 | from sklearn.preprocessing import MinMaxScaler 8 | from sklearn import datasets 9 | from sklearn.exceptions import ConvergenceWarning 10 | import numpy as np 11 | import os 12 | import random 13 | import sys, inspect 14 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 15 | parentdir = os.path.dirname(currentdir) 16 | sys.path.insert(0, parentdir) 17 | import fma 18 | 19 | genre_labels = pd.read_csv('../data/train_labels.csv', index_col=0) 20 | nb_genres = 0 21 | nb_features = 10000 # Variable that you can change 22 | nb_music_by_genre = 5 # Varaible that you can change 23 | 24 | # different learning rate schedules and momentum parameters 25 | params = [ 26 | { 27 | "solver": "sgd", 28 | "alpha" : 1e-10, 29 | "activation": 'logistic', 30 | "random_state" : 5, 31 | "max_iter": 15000, 32 | "learning_rate_init": 0.1, 33 | "hidden_layer_sizes" : (5,) 34 | }, 35 | { 36 | "solver": "sgd", 37 | "alpha" : 1e-10, 38 | "activation": 'logistic', 39 | "random_state" : 5, 40 | "max_iter": 15000, 41 | "learning_rate_init": 0.1, 42 | "hidden_layer_sizes" : (10,) 43 | }, 44 | { 45 | "solver": "sgd", 46 | "alpha" : 1e-10, 47 | "activation": 'logistic', 48 | "random_state" : 5, 49 | "max_iter": 15000, 50 | "learning_rate_init": 0.1, 51 | "hidden_layer_sizes" : (15,) 52 | }, 53 | { 54 | "solver": "sgd", 55 | "alpha" : 1e-10, 56 | "activation": 'logistic', 57 | "random_state" : 5, 58 | "max_iter": 15000, 59 | "learning_rate_init": 0.1, 60 | "hidden_layer_sizes" : (20,) 61 | }, 62 | { 63 | "solver": "sgd", 64 | "alpha" : 1e-10, 65 | "activation": 'logistic', 66 | "random_state" : 5, 67 | "max_iter": 15000, 68 | "learning_rate_init": 0.1, 69 | "hidden_layer_sizes" : (5,2) 70 | }, 71 | { 72 | "solver": "sgd", 73 | "alpha" : 1e-10, 74 | "activation": 'logistic', 75 | "random_state" : 5, 76 | "max_iter": 15000, 77 | "learning_rate_init": 0.1, 78 | "hidden_layer_sizes" : (10,2) 79 | }, 80 | { 81 | "solver": "sgd", 82 | "alpha" : 1e-10, 83 | "activation": 'logistic', 84 | "random_state" : 5, 85 | "max_iter": 15000, 86 | "learning_rate_init": 0.1, 87 | "hidden_layer_sizes" : (15,2) 88 | }, 89 | { 90 | "solver": "sgd", 91 | "alpha" : 1e-10, 92 | "activation": 'logistic', 93 | "random_state" : 5, 94 | "max_iter": 15000, 95 | "learning_rate_init": 0.1, 96 | "hidden_layer_sizes" : (20,2) 97 | }, 98 | ] 99 | 100 | labels = [ 101 | "1 layer, 5 neurons, logistic", 102 | "1 layer, 10 neurons, logistic", 103 | "1 layer, 15 neurons, logistic", 104 | "1 layer, 20 neurons, logistic", 105 | "2 layers, 5 neurons, logistic", 106 | "2 layers, 10 neurons, logistic", 107 | "2 layers, 15 neurons, logistic", 108 | "2 layers, 20 neurons, logistic", 109 | ] 110 | 111 | plot_args = [ 112 | {"c": "red", "linestyle": "-"}, 113 | {"c": "green", "linestyle": "-"}, 114 | {"c": "blue", "linestyle": "-"}, 115 | {"c": "red", "linestyle": "--"}, 116 | {"c": "green", "linestyle": "--"}, 117 | {"c": "blue", "linestyle": "--"}, 118 | {"c": "black", "linestyle": "-"}, 119 | ] 120 | 121 | 122 | def plot_on_dataset(X, y): 123 | # for each dataset, plot learning for each learning strategy 124 | X = MinMaxScaler().fit_transform(X) 125 | mlps = [] 126 | 127 | for label, param in zip(labels, params): 128 | print("training: %s" % label) 129 | mlp = MLPClassifier(**param) 130 | 131 | # some parameter combinations will not converge as can be seen on the 132 | # plots so they are ignored here 133 | with warnings.catch_warnings(): 134 | warnings.filterwarnings( 135 | "ignore", category=ConvergenceWarning, module="sklearn" 136 | ) 137 | mlp.fit(X, y) 138 | 139 | mlps.append(mlp) 140 | print("Training set score: %f" % mlp.score(X, y)) 141 | print("Training set loss: %f" % mlp.loss_) 142 | for mlp, label, args in zip(mlps, labels, plot_args): 143 | plt.plot(mlp.loss_curve_, label=label, **args) 144 | 145 | fig, axes = plt.figure(figsize=(10, 10)), [1] 146 | 147 | def get_features_song(f): 148 | global nb_features 149 | try: 150 | features = np.genfromtxt(f, delimiter=',')[:nb_features] 151 | if (len(features) == nb_features): 152 | return features 153 | return [] 154 | except: 155 | return [] 156 | 157 | # function to get all the tracks from a genre in labels variable 158 | def get_genre_songs(genre, limits=1000): 159 | global genre_labels 160 | paths = [] 161 | tmp = genre_labels.loc[genre_labels['genre'] == genre] 162 | indexes = tmp.index.values 163 | for i in indexes: 164 | path = fma.get_audio_path(i, solution=True) 165 | if (os.path.exists(path)): 166 | paths.append(path) 167 | if (len(paths) < limits): 168 | return paths 169 | random_paths = random.choices(paths, k=limits) 170 | return random_paths 171 | 172 | def time_convert(sec): 173 | mins = sec // 60 174 | sec = sec % 60 175 | hours = mins // 60 176 | mins = mins % 60 177 | print("\nTime Lapsed = {0}:{1}:{2}\n".format(int(hours),int(mins),sec)) 178 | 179 | def display_details_compute(genres, arr_nb_songs_by_genre): 180 | for i in range(len(genres)): 181 | print("{} songs in {} genre".format(arr_nb_songs_by_genre[i], genres[i])) 182 | 183 | def generate_features_and_labels(nb_music_by_genre): 184 | global nb_genres 185 | all_features = [] 186 | all_labels = [] 187 | 188 | GENRES = ['Electronic', 'Experimental', 'Folk', 'Hip-Hop', 189 | 'Instrumental', 'International', 'Pop', 'Rock'] 190 | arr_nb_songs_by_genre = [] 191 | start_time = time.time() # Calc time to compute 192 | for genre in GENRES: 193 | songs_computed = 0 194 | sound_files = get_genre_songs(genre, limits=nb_music_by_genre) # 100 195 | print('Processing %d songs in %s genre...' % (len(sound_files), genre)) 196 | if sound_files: 197 | nb_genres += 1 198 | for f in sound_files: 199 | if (not os.path.isfile(f)) : 200 | continue 201 | print("\t-> Processing %s ..." % f) 202 | features = get_features_song(f) 203 | if len(features): 204 | all_features.append(features) 205 | all_labels.append(genre) 206 | songs_computed += 1 207 | arr_nb_songs_by_genre.append(songs_computed) 208 | # convert labels to one-hot encoding 209 | label_uniq_ids, label_row_ids = np.unique(all_labels, return_inverse=True) 210 | label_row_ids = label_row_ids.astype(np.int32, copy=False) 211 | # onehot_labels = to_categorical(label_row_ids, len(label_uniq_ids)) 212 | onehot_labels = np.eye(len(label_uniq_ids))[label_row_ids] 213 | end_time = time.time() 214 | time_lapsed = end_time - start_time 215 | time_convert(time_lapsed) # Show time to compute 216 | # print(min([np.shape(i) for i in all_features])) 217 | print(display_details_compute(GENRES, arr_nb_songs_by_genre)) 218 | return np.stack(all_features), onehot_labels 219 | 220 | features, genre_labels = generate_features_and_labels(nb_music_by_genre) 221 | 222 | training_split = 0.8 223 | 224 | alldata = np.column_stack((features, genre_labels)) 225 | 226 | np.random.shuffle(alldata) 227 | splitidx = int(len(alldata) * training_split) 228 | train, test = alldata[:splitidx,:], alldata[splitidx:,:] 229 | 230 | train_input = train[:,:-nb_genres] 231 | train_labels = train[:,-nb_genres:].astype(int) 232 | 233 | test_input = test[:,:-nb_genres] 234 | test_labels = test[:,-nb_genres:].astype(int) 235 | 236 | nb_hidden_layer_sizes = (15,) 237 | 238 | plot_on_dataset(train_input, train_labels) 239 | 240 | fig.legend(labels, ncol=3, loc="upper center") 241 | plt.savefig('plot.png') 242 | plt.show() -------------------------------------------------------------------------------- /subject.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mitix-EPI/Workshop-Neural-Network/dc730bc79e734cb761110c8a3935678ac0dea387/subject.pdf --------------------------------------------------------------------------------