├── .ipynb_checkpoints
    └── urbanclap_audio_classification-checkpoint.ipynb
├── Exploring the UrbanClap Data.ipynb
├── README.md
├── urbanclap_audio_classification.ipynb
└── urbanclap_audio_classification.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Audio-Classification-using-Deep-Learning
 2 | Classifying 10 different categories of Urban Sounds using Deep Learning.
 3 | 
 4 | The audio files can be downloaded from the following link: 
 5 | https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU
 6 | 
 7 | 
 8 | ## IMPORTANT: The folders should be arranged in the following manner: 
 9 | Dir of train label: sounds/labels/train.csv
10 | 
11 | Dir of test label: sounds/labels/test.csv
12 | 
13 | Dir of train sounds:sounds/train/train_sound/ (audio files in .wav format)
14 | 
15 | Dir of train sounds:sounds/test/test_sound/ (audio files in .wav format)
16 | 
17 | 
18 | ### The train folder are labelled
19 | ### The test folder aren't labelled
20 | 
21 | We separate one audio signal into 3 to actually load the data into a machine understandable format. 
22 | For this, we simply take values after every specific time steps. 
23 | For example; in a 2 second audio file, we extract values at half a second. 
24 | ![Alt Text](https://s3-ap-south-1.amazonaws.com/av-blog-media/wp-content/uploads/2017/08/23210623/sound.png)
25 | This is called sampling of audio data, and the rate at which it is sampled is called the sampling rate.
26 | 
27 | Different pure signals, which can now be represented as three unique values in frequency domain.
28 | 
29 | There are a few more ways in which audio data can be represented, for example. using MFCs (Mel-Frequency cepstrums).
30 | These are nothing but different ways to represent the data.
31 | 
32 | Next we extract features from this audio representations, so that our Deep Learning model can work on these features and perform the task it is designed for..
33 | 


--------------------------------------------------------------------------------
/urbanclap_audio_classification.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # In[61]:
  5 | 
  6 | 
  7 | # importing necessary libraries and dependencies
  8 | import os
  9 | import numpy as np
 10 | import pandas as pd
 11 | import matplotlib.pyplot as plt
 12 | import librosa
 13 | from sklearn.preprocessing import LabelEncoder
 14 | from keras.utils.np_utils import to_categorical
 15 | from keras.models import Sequential
 16 | from keras.layers import Dense, Dropout, Activation
 17 | from keras.optimizers import Adam
 18 | from sklearn.cross_validation import train_test_split
 19 | from sklearn import metrics
 20 | 
 21 | 
 22 | # In[2]:
 23 | 
 24 | 
 25 | train_data_dir = 'sounds/train/train_sound/'
 26 | test_data_dir = 'sounds/test/test_sound/'
 27 | 
 28 | # reading the labels
 29 | train = pd.read_csv('sounds/labels/train.csv')
 30 | test  = pd.read_csv('sounds/labels/test.csv')
 31 | 
 32 | 
 33 | # In[3]:
 34 | 
 35 | 
 36 | # function to load files and extract features
 37 | def parser(row, data_dir):
 38 |     # setting path
 39 |     file_name = os.path.join(data_dir,str(row.ID)+'.wav')
 40 |     print(file_name)
 41 |     # check if the file is corrupted
 42 |     try:
 43 |         # here kaiser_fast is a technique used for faster extraction
 44 |         # X-> audio_time_series_data; sample_rate-> sampling rate
 45 |         X, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
 46 |         
 47 |         # extraccting Mel-Frequeny Cepstral Coeficients feature from data
 48 |         # y -> accepts time-series audio data; sr -> accepts sampling rate
 49 |         # n_mfccs -> no. of MFCCs to return
 50 |         mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis = 0)
 51 |     
 52 |     except Exception as e:
 53 |         print("Error encountered while parsing file: ", e)
 54 |         return None, None
 55 |     
 56 |     # store mfccs features
 57 |     feature = mfccs
 58 |     # store the respective id
 59 |     data_id = row.ID
 60 |     
 61 |     return [data_id, feature]
 62 | 
 63 | 
 64 | # ### Reading train.csv and storing into temp
 65 | 
 66 | # In[4]:
 67 | 
 68 | 
 69 | # parsing train
 70 | temp = train.apply(parser,axis=1,data_dir=train_data_dir)
 71 | temp.columns = ['ID','feature']
 72 | 
 73 | 
 74 | # In[5]:
 75 | 
 76 | 
 77 | # adding Class to 'temp'
 78 | temp['Class'] = train['Class']
 79 | 
 80 | 
 81 | # In[6]:
 82 | 
 83 | 
 84 | type(temp)
 85 | 
 86 | 
 87 | # ###  Reading test.csv and storing into temp_test
 88 | 
 89 | # In[7]:
 90 | 
 91 | 
 92 | # parsing test
 93 | temp_test = test.apply(parser, axis=1,data_dir=test_data_dir)
 94 | temp_test.columns = ['ID', 'feature']
 95 | 
 96 | 
 97 | # In[16]:
 98 | 
 99 | 
100 | temp_test = pd.DataFrame(temp_test)
101 | type(temp_test)
102 | 
103 | 
104 | # In[19]:
105 | 
106 | 
107 | temp_test.columns = ['mix']
108 | 
109 | 
110 | # In[23]:
111 | 
112 | 
113 | temp_test.keys()
114 | 
115 | 
116 | # In[24]:
117 | 
118 | 
119 | temp_test[['ID','feature']] = temp_test['mix'].apply(pd.Series)
120 | 
121 | 
122 | # In[28]:
123 | 
124 | 
125 | temp_test.drop('mix',axis=1,inplace=True)
126 | 
127 | 
128 | # In[32]:
129 | 
130 | 
131 | print("\n---------------------train data---------------------")
132 | print(type(temp))
133 | print(temp.head())
134 | 
135 | print("\n---------------------test data---------------------")
136 | print(type(temp_test))
137 | print(temp_test.head())
138 | 
139 | 
140 | print('---------------------Checking for NONE values---------------------')
141 | # checking for NONE values
142 | print(temp[temp.Class.isnull()])
143 | 
144 | # removing NONE values from temp
145 | temp = temp[temp.Class.notnull()]
146 | temp_test = temp_test[temp_test.notnull()]
147 | #print(temp.ID[temp.label.isnull()])
148 | 
149 | 
150 | # In[37]:
151 | 
152 | 
153 | temp.Class.unique()
154 | 
155 | 
156 | # In[38]:
157 | 
158 | 
159 | temp.Class.nunique()
160 | 
161 | 
162 | # In[35]:
163 | 
164 | 
165 | # Label Encoding the audio data
166 | lb = LabelEncoder()
167 | 
168 | # converting pd.series into np.array for faster processing
169 | X = np.array(temp.feature.tolist())
170 | y = np.array(temp.Class.tolist())
171 | 
172 | 
173 | y = to_categorical(lb.fit_transform(y))
174 | 
175 | 
176 | # In[62]:
177 | 
178 | 
179 | x_train,x_test,y_train,y_test = train_test_split(X,y, test_size=0.3)
180 | 
181 | 
182 | # ## Building a deep learning model
183 | 
184 | # In[73]:
185 | 
186 | 
187 | num_labels = y.shape[1]
188 | filter_size = 2
189 | 
190 | def categorical_classifier():
191 |     model = Sequential()
192 | 
193 |     # input and first hidden layer
194 |     model.add(Dense(input_shape=(40,), units=256, activation='relu', kernel_initializer='uniform'))
195 |     model.add(Dropout(0.5))
196 | 
197 |     # second hidden layer
198 |     model.add(Dense(units=256,activation='relu',kernel_initializer='uniform'))
199 |     model.add(Dropout(0.5))
200 | 
201 |     # output layer
202 |     model.add(Dense(units=num_labels, activation='softmax'))
203 | 
204 |     # compiling our model
205 |     model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
206 | 
207 |     # training the data
208 |     #model.fit(X,y, batch_size=32, epochs=500, validation_split=0.3)
209 |     return model
210 | 
211 | 
212 | # In[77]:
213 | 
214 | 
215 | # training the data
216 | model.fit(x_train,y_train, batch_size=32, epochs=650, validation_data=(x_test, y_test))
217 | 
218 | 


--------------------------------------------------------------------------------