├── Model Testing-L2.ipynb ├── Model Training and Testing.ipynb ├── MonophoneCambridgeBig-LIBROSA.ipynb ├── Phone Folders for L2.ipynb ├── PhoneLevel_AutoEncoder_MFCC_TrainCamb_TestCamb.ipynb ├── PhoneLevel_AutoEncoder_TrainLibri_TestLibri.ipynb ├── PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2.ipynb ├── PhoneLevel_CAutoencoder_MFCC_TrainL2_TestL2.ipynb ├── PhoneLevel_ML11_MFCC_TrainCamb_TestCamb.ipynb ├── PhoneLevel_ML11_MFCC_TrainCamb_TestUK.ipynb ├── PhoneLevel_ML11_MFCC_TrainL2_TestL2.ipynb ├── PhoneLevel_OCSVM_LogfBank_TrainCamb_TestCamb.ipynb ├── PhoneLevel_OCSVM_MFCC_TrainCamb_TestCamb.ipynb ├── PhoneLevel_OCSVM_MFCC_TrainCamb_TestL2.ipynb ├── PhoneLevel_OCSVM_MFCC_TrainL2_TestL2.ipynb ├── PhoneLevel_Pyod_LogfBank_TrainCamb_TestCamb.ipynb ├── PhoneLevel_RL_MFCC_TrainBigCamb_TestL2.ipynb ├── PhoneLevel_RL_MFCC_TrainCamb_TestL2.ipynb ├── PhoneLevel_RL_MFCC_TrainL2_TestL2.ipynb ├── PhoneLevel_SVM_MFCC_TrainL2_TestL2.ipynb ├── README.md ├── Train Val Test and Train scalings for Cambridge Big - LIBROSA.ipynb ├── WordLevel_RL_MP_MFCC_TrainCamb_TestL2.ipynb ├── WordLevel_RL_MP_MFCC_TrainL2_TestL2.ipynb └── download.png /MonophoneCambridgeBig-LIBROSA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "'D:\\\\Thesis Work\\\\InitialDemoWork'" 12 | ] 13 | }, 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "pwd" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import os\n", 30 | "import tgt\n", 31 | "import IPython.display as ipd\n", 32 | "import librosa\n", 33 | "import numpy as np\n", 34 | "import pandas as pd\n", 35 | "from tqdm import tqdm_notebook" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "application/vnd.jupyter.widget-view+json": { 46 | "model_id": "fbab6baa45d741f18439071e8d243466", 47 | "version_major": 2, 48 | "version_minor": 0 49 | }, 50 | "text/plain": [ 51 | "HBox(children=(IntProgress(value=0, max=29925), HTML(value='')))" 52 | ] 53 | }, 54 | "metadata": {}, 55 | "output_type": "display_data" 56 | }, 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "SAMPLE_RATE = 44100\n", 67 | "MAX_NFRAME = 25578 #to make (50,13) all phones safe in 99.7%\n", 68 | "mfccs = []\n", 69 | "phones = []\n", 70 | "n_frames = []\n", 71 | "#timeSeriesList = []\n", 72 | "\n", 73 | "\n", 74 | "files = [file[:-4] for file in os.listdir('./selectedToBeWavs/') if '.wav' in file]\n", 75 | "\n", 76 | "for file in tqdm_notebook(files): #a file per word\n", 77 | " data, rate = librosa.core.load(\"./selectedToBeWavs/\"+file+\".wav\", sr=SAMPLE_RATE)\n", 78 | " ann = tgt.io.read_textgrid('./alignedWavs/selectedToBeWavs/'+file+\".TextGrid\") #UTF-8\n", 79 | " #Tiers: words, phones, IPA(UTF-8)\n", 80 | " phoneTier = ann.get_tier_by_name(\"phones\") #format: (start,end,text)\n", 81 | "\n", 82 | " for phone in phoneTier: #for every phone\n", 83 | " timeSeries = data[int(phone.start_time*rate):int(phone.end_time*rate)]\n", 84 | " if timeSeries.size <= MAX_NFRAME:\n", 85 | " timeSeries=np.concatenate((timeSeries,np.zeros(MAX_NFRAME-timeSeries.size)))\n", 86 | " #timeSeriesList.append(timeSeries)\n", 87 | " mfccs.append(librosa.feature.mfcc(timeSeries, sr = SAMPLE_RATE, n_mfcc=13))\n", 88 | " #mfccs.append(mfcc(timeSeries,SAMPLE_RATE,nfilt=13,nfft=2048,winfunc=np.hamming))\n", 89 | " n_frames.append(timeSeries.size)\n", 90 | " phones.append(phone.text)\n", 91 | " \n", 92 | "#Some sample of the timeseries are listened to check consistency. Checks out.Very hard to listen to the phones though" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 4, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "df = pd.DataFrame({'phones' : phones, 'n_frames': n_frames, 'MFCCs': mfccs})" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 5, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "df.phones = [''.join([alpha for alpha in phone if alpha.isalpha()]) for phone in df.phones]" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 6, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "(224722, 3)" 122 | ] 123 | }, 124 | "execution_count": 6, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "df.shape" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 7, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/html": [ 141 | "
\n", 142 | "\n", 155 | "\n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | "
phonesn_framesMFCCs
0AA25578[[-212.6292397588255, -180.4339980080033, -166...
1sp25578[[-588.6476307487844, -601.2085347365278, -621...
2R25578[[-233.00378201793862, -234.23804420914087, -2...
3sp25578[[-687.5199944609631, -679.0043939829035, -693...
4EY25578[[-179.22319403266283, -143.06206693968687, -1...
\n", 197 | "
" 198 | ], 199 | "text/plain": [ 200 | " phones n_frames MFCCs\n", 201 | "0 AA 25578 [[-212.6292397588255, -180.4339980080033, -166...\n", 202 | "1 sp 25578 [[-588.6476307487844, -601.2085347365278, -621...\n", 203 | "2 R 25578 [[-233.00378201793862, -234.23804420914087, -2...\n", 204 | "3 sp 25578 [[-687.5199944609631, -679.0043939829035, -693...\n", 205 | "4 EY 25578 [[-179.22319403266283, -143.06206693968687, -1..." 206 | ] 207 | }, 208 | "execution_count": 7, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | } 212 | ], 213 | "source": [ 214 | "df.head()" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 8, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "41" 226 | ] 227 | }, 228 | "execution_count": 8, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "df.phones.nunique()" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 9, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/plain": [ 245 | "224722" 246 | ] 247 | }, 248 | "execution_count": 9, 249 | "metadata": {}, 250 | "output_type": "execute_result" 251 | } 252 | ], 253 | "source": [ 254 | "sum(df.n_frames == 25578)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 10, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "text/plain": [ 265 | "True" 266 | ] 267 | }, 268 | "execution_count": 10, 269 | "metadata": {}, 270 | "output_type": "execute_result" 271 | } 272 | ], 273 | "source": [ 274 | "np.all(df.MFCCs.apply(np.shape) == (13,50))" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 11, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "for phone in df.phones.unique():\n", 284 | " df[df.phones==phone].drop([\"phones\",\"n_frames\"],axis=1).to_pickle(\"./Cambridge_bigger USING LIBROSA/phoneDfs/\"+phone+\"_df.pickle\")" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 12, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "np.save(\"./Cambridge_bigger USING LIBROSA/phoneList.npy\",np.array(df.phones.unique()))" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "#### Delta Features:" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 4, 306 | "metadata": { 307 | "scrolled": false 308 | }, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "application/vnd.jupyter.widget-view+json": { 313 | "model_id": "2eade8cbb04f48318bd2c65d5267e8e6", 314 | "version_major": 2, 315 | "version_minor": 0 316 | }, 317 | "text/plain": [ 318 | "HBox(children=(IntProgress(value=0, max=41), HTML(value='')))" 319 | ] 320 | }, 321 | "metadata": {}, 322 | "output_type": "display_data" 323 | }, 324 | { 325 | "name": "stdout", 326 | "output_type": "stream", 327 | "text": [ 328 | "(3889, 1)\n", 329 | "(20712, 1)\n", 330 | "(10364, 1)\n", 331 | "(3786, 1)\n", 332 | "(4367, 1)\n", 333 | "(8690, 1)\n", 334 | "(21038, 1)\n", 335 | "(4927, 1)\n", 336 | "(9416, 1)\n", 337 | "(12562, 1)\n", 338 | "(3862, 1)\n", 339 | "(3568, 1)\n", 340 | "(13505, 1)\n", 341 | "(12399, 1)\n", 342 | "(2691, 1)\n", 343 | "(14294, 1)\n", 344 | "(7424, 1)\n", 345 | "(6341, 1)\n", 346 | "(2828, 1)\n", 347 | "(1839, 1)\n", 348 | "(5788, 1)\n", 349 | "(2738, 1)\n", 350 | "(13094, 1)\n", 351 | "(6155, 1)\n", 352 | "(2661, 1)\n", 353 | "(1478, 1)\n", 354 | "(2400, 1)\n", 355 | "(2154, 1)\n", 356 | "(1649, 1)\n", 357 | "(2180, 1)\n", 358 | "(2213, 1)\n", 359 | "(2486, 1)\n", 360 | "(739, 1)\n", 361 | "(190, 1)\n", 362 | "(6113, 1)\n", 363 | "(726, 1)\n", 364 | "(1289, 1)\n", 365 | "(1111, 1)\n", 366 | "(556, 1)\n", 367 | "(295, 1)\n", 368 | "(205, 1)\n", 369 | "\n" 370 | ] 371 | } 372 | ], 373 | "source": [ 374 | "phoneList = np.load(\"./Cambridge_bigger USING LIBROSA/phoneList.npy\",allow_pickle=True)\n", 375 | "for phone in tqdm_notebook(phoneList):\n", 376 | " df = pd.read_pickle(\"./Cambridge_bigger USING LIBROSA/phoneDfs/\"+phone+\"_df.pickle\")\n", 377 | " print(df.shape)\n", 378 | " df['deltaC'] = df.MFCCs.apply(librosa.feature.delta)\n", 379 | " df['doubleDeltaC'] = df.MFCCs.apply(librosa.feature.delta, order=2)\n", 380 | " X = np.array((df.MFCCs,df.deltaC,df.doubleDeltaC))\n", 381 | " X = np.transpose(X,[1,2,3,0])\n", 382 | " np.save('./Cambridge_bigger USING LIBROSA/CAMBRIDGE_BIGGER/'+phone+'_X.npy',X) #Feature vector batch" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 5, 388 | "metadata": { 389 | "scrolled": true 390 | }, 391 | "outputs": [ 392 | { 393 | "data": { 394 | "text/plain": [ 395 | "224722" 396 | ] 397 | }, 398 | "execution_count": 5, 399 | "metadata": {}, 400 | "output_type": "execute_result" 401 | } 402 | ], 403 | "source": [ 404 | "3889+20712+10364+3786+4367+8690+21038+4927+9416+12562+3862+3568+13505+12399+2691+14294+7424+6341+2828+1839+5788+2738+13094+6155+2661+1478+2400+2154+1649+2180+2213+2486+739+190+6113+726+1289+1111+556+295+205" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 6, 410 | "metadata": {}, 411 | "outputs": [ 412 | { 413 | "data": { 414 | "text/plain": [ 415 | "(205, 13, 50, 3)" 416 | ] 417 | }, 418 | "execution_count": 6, 419 | "metadata": {}, 420 | "output_type": "execute_result" 421 | } 422 | ], 423 | "source": [ 424 | "X.shape" 425 | ] 426 | }, 427 | { 428 | "cell_type": "markdown", 429 | "metadata": {}, 430 | "source": [ 431 | "_______" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "---------" 439 | ] 440 | } 441 | ], 442 | "metadata": { 443 | "kernelspec": { 444 | "display_name": "Python 3", 445 | "language": "python", 446 | "name": "python3" 447 | }, 448 | "language_info": { 449 | "codemirror_mode": { 450 | "name": "ipython", 451 | "version": 3 452 | }, 453 | "file_extension": ".py", 454 | "mimetype": "text/x-python", 455 | "name": "python", 456 | "nbconvert_exporter": "python", 457 | "pygments_lexer": "ipython3", 458 | "version": "3.7.3" 459 | } 460 | }, 461 | "nbformat": 4, 462 | "nbformat_minor": 2 463 | } 464 | -------------------------------------------------------------------------------- /Phone Folders for L2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 9, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import librosa" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 5, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | " Volume in drive D is DATA\n", 24 | " Volume Serial Number is F490-BACE\n", 25 | "\n", 26 | " Directory of D:\\Thesis Work\\InitialDemoWork\\Cambridge_bigger USING LIBROSA\n", 27 | "\n", 28 | "05/26/2019 12:42 PM .\n", 29 | "05/26/2019 12:42 PM ..\n", 30 | "05/26/2019 12:42 PM .ipynb_checkpoints\n", 31 | "05/15/2019 03:44 PM CAMBRIDGE_BIGGER\n", 32 | "05/15/2019 03:51 PM Cambridge_bigger Phones Dev\n", 33 | "05/15/2019 03:51 PM Cambridge_bigger Phones Test\n", 34 | "05/15/2019 03:51 PM Cambridge_bigger Phones Train\n", 35 | "05/25/2019 04:13 PM Cambridge_bigger Train AbsMax\n", 36 | "05/15/2019 03:57 PM Cambridge_bigger Train MinMax\n", 37 | "05/25/2019 03:52 PM 432,658 Final Notebook.html\n", 38 | "01/22/2019 10:24 PM 183,173,931 L2monoPhoneDfTest.pickle\n", 39 | "01/22/2019 10:56 PM 391,178,932 L2monoPhoneDfTrain.pickle\n", 40 | "05/18/2019 10:56 PM lessModels\n", 41 | "05/25/2019 04:48 PM 81,167 Model Training and Testing-MAX NORM.ipynb\n", 42 | "05/25/2019 04:03 PM 141,176 Model Training and Testing.ipynb\n", 43 | "05/26/2019 12:42 PM 141,462 Model Training-EC and Testing-L2.ipynb\n", 44 | "05/18/2019 05:41 PM Models\n", 45 | "05/15/2019 03:46 PM 11,242 MonophoneCambridgeBig-LIBROSA.ipynb\n", 46 | "05/26/2019 12:42 PM 72 Phone Folders for L2.ipynb\n", 47 | "05/15/2019 03:28 PM phoneDfs\n", 48 | "05/15/2019 03:29 PM 678 phoneList.npy\n", 49 | "05/25/2019 04:15 PM 290,252 ScrappingCode.html\n", 50 | "05/25/2019 04:24 PM tanhModels\n", 51 | "05/25/2019 04:13 PM 14,001 Train Val Test and Train scalings for Cambridge Big - LIBROSA.ipynb\n", 52 | " 11 File(s) 575,465,571 bytes\n", 53 | " 13 Dir(s) 938,355,027,968 bytes free\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "!dir" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 65, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "test_featureDf = pd.read_pickle(\"./L2monoPhoneDfTest.pickle\")" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 66, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/html": [ 78 | "
\n", 79 | "\n", 92 | "\n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | "
phonesn_frameslabelserrorTypeMFCCs
0G255781c[[-423.77322755406794, -411.9959932464969, -41...
1AA1255781c[[-221.94339159438036, -221.9371806257653, -22...
2D255781c[[-325.1734666200506, -337.9629833826362, -363...
3B255781c[[-447.436674575593, -452.4805757789892, -464....
4L255781c[[-292.9672733282472, -267.3215759439314, -275...
\n", 146 | "
" 147 | ], 148 | "text/plain": [ 149 | " phones n_frames labels errorType \\\n", 150 | "0 G 25578 1 c \n", 151 | "1 AA1 25578 1 c \n", 152 | "2 D 25578 1 c \n", 153 | "3 B 25578 1 c \n", 154 | "4 L 25578 1 c \n", 155 | "\n", 156 | " MFCCs \n", 157 | "0 [[-423.77322755406794, -411.9959932464969, -41... \n", 158 | "1 [[-221.94339159438036, -221.9371806257653, -22... \n", 159 | "2 [[-325.1734666200506, -337.9629833826362, -363... \n", 160 | "3 [[-447.436674575593, -452.4805757789892, -464.... \n", 161 | "4 [[-292.9672733282472, -267.3215759439314, -275... " 162 | ] 163 | }, 164 | "execution_count": 66, 165 | "metadata": {}, 166 | "output_type": "execute_result" 167 | } 168 | ], 169 | "source": [ 170 | "test_featureDf.head()" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 67, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "test_featureDf['deltaC'] = test_featureDf.MFCCs.apply(librosa.feature.delta)\n", 180 | "test_featureDf['doubleDeltaC'] = test_featureDf.MFCCs.apply(librosa.feature.delta, order=2)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 68, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "name": "stdout", 190 | "output_type": "stream", 191 | "text": [ 192 | "Test Phones: 34799\n", 193 | "Unique Test Phones: 71\n" 194 | ] 195 | } 196 | ], 197 | "source": [ 198 | "print(\"Test Phones: \", len(test_featureDf.phones))\n", 199 | "print(\"Unique Test Phones: \", len(set(test_featureDf.phones)))" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 69, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "#remove stresses\n", 209 | "test_featureDf.phones = [''.join([alpha for alpha in phone if alpha.isalpha()]) for phone in test_featureDf.phones]" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 70, 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "name": "stdout", 219 | "output_type": "stream", 220 | "text": [ 221 | "Unique Test Phones: 42\n" 222 | ] 223 | } 224 | ], 225 | "source": [ 226 | "print(\"Unique Test Phones: \", len(set(test_featureDf.phones)))" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 71, 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/html": [ 237 | "
\n", 238 | "\n", 251 | "\n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | "
phonesn_frameslabelserrorTypeMFCCsdeltaCdoubleDeltaC
0G255781c[[-423.77322755406794, -411.9959932464969, -41...[[22.300710618871317, 22.300710618871317, 22.3...[[9.696126870518768, 9.696126870518768, 9.6961...
1AA255781c[[-221.94339159438036, -221.9371806257653, -22...[[-11.524404444499638, -11.524404444499638, -1...[[-9.994265686136963, -9.994265686136963, -9.9...
2D255781c[[-325.1734666200506, -337.9629833826362, -363...[[-15.686472752824004, -15.686472752824004, -1...[[1.1662679563217837, 1.1662679563217837, 1.16...
3B255781c[[-447.436674575593, -452.4805757789892, -464....[[-34.727038663343485, -34.727038663343485, -3...[[-34.82559064172155, -34.82559064172155, -34....
4L255781c[[-292.9672733282472, -267.3215759439314, -275...[[-3.6625277966476655, -3.6625277966476655, -3...[[7.597755619620676, 7.597755619620676, 7.5977...
\n", 317 | "
" 318 | ], 319 | "text/plain": [ 320 | " phones n_frames labels errorType \\\n", 321 | "0 G 25578 1 c \n", 322 | "1 AA 25578 1 c \n", 323 | "2 D 25578 1 c \n", 324 | "3 B 25578 1 c \n", 325 | "4 L 25578 1 c \n", 326 | "\n", 327 | " MFCCs \\\n", 328 | "0 [[-423.77322755406794, -411.9959932464969, -41... \n", 329 | "1 [[-221.94339159438036, -221.9371806257653, -22... \n", 330 | "2 [[-325.1734666200506, -337.9629833826362, -363... \n", 331 | "3 [[-447.436674575593, -452.4805757789892, -464.... \n", 332 | "4 [[-292.9672733282472, -267.3215759439314, -275... \n", 333 | "\n", 334 | " deltaC \\\n", 335 | "0 [[22.300710618871317, 22.300710618871317, 22.3... \n", 336 | "1 [[-11.524404444499638, -11.524404444499638, -1... \n", 337 | "2 [[-15.686472752824004, -15.686472752824004, -1... \n", 338 | "3 [[-34.727038663343485, -34.727038663343485, -3... \n", 339 | "4 [[-3.6625277966476655, -3.6625277966476655, -3... \n", 340 | "\n", 341 | " doubleDeltaC \n", 342 | "0 [[9.696126870518768, 9.696126870518768, 9.6961... \n", 343 | "1 [[-9.994265686136963, -9.994265686136963, -9.9... \n", 344 | "2 [[1.1662679563217837, 1.1662679563217837, 1.16... \n", 345 | "3 [[-34.82559064172155, -34.82559064172155, -34.... \n", 346 | "4 [[7.597755619620676, 7.597755619620676, 7.5977... " 347 | ] 348 | }, 349 | "execution_count": 71, 350 | "metadata": {}, 351 | "output_type": "execute_result" 352 | } 353 | ], 354 | "source": [ 355 | "test_featureDf.head()" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 72, 361 | "metadata": {}, 362 | "outputs": [ 363 | { 364 | "data": { 365 | "text/plain": [ 366 | "c 30761\n", 367 | "s 2290\n", 368 | " s 899\n", 369 | " d 277\n", 370 | "d 194\n", 371 | "a 186\n", 372 | " a 111\n", 373 | "S 79\n", 374 | "D 2\n", 375 | "Name: errorType, dtype: int64" 376 | ] 377 | }, 378 | "execution_count": 72, 379 | "metadata": {}, 380 | "output_type": "execute_result" 381 | } 382 | ], 383 | "source": [ 384 | "test_featureDf[\"errorType\"].value_counts()" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 73, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "#only substitution errors needed\n", 394 | "test_featureDf = test_featureDf[test_featureDf[\"errorType\"].isin(['s',' s','S','c',' S'])].copy()" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 74, 400 | "metadata": {}, 401 | "outputs": [ 402 | { 403 | "data": { 404 | "text/plain": [ 405 | "(34029, 7)" 406 | ] 407 | }, 408 | "execution_count": 74, 409 | "metadata": {}, 410 | "output_type": "execute_result" 411 | } 412 | ], 413 | "source": [ 414 | "test_featureDf.shape" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 75, 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "data": { 424 | "text/plain": [ 425 | "1 30761\n", 426 | "0 3268\n", 427 | "Name: labels, dtype: int64" 428 | ] 429 | }, 430 | "execution_count": 75, 431 | "metadata": {}, 432 | "output_type": "execute_result" 433 | } 434 | ], 435 | "source": [ 436 | "test_featureDf.labels.value_counts()" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": 76, 442 | "metadata": {}, 443 | "outputs": [], 444 | "source": [ 445 | "test_featureDf.drop(columns=[\"errorType\",\"n_frames\"],inplace=True)" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 77, 451 | "metadata": {}, 452 | "outputs": [ 453 | { 454 | "data": { 455 | "text/html": [ 456 | "
\n", 457 | "\n", 470 | "\n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | "
phoneslabelsMFCCsdeltaCdoubleDeltaC
0G1[[-423.77322755406794, -411.9959932464969, -41...[[22.300710618871317, 22.300710618871317, 22.3...[[9.696126870518768, 9.696126870518768, 9.6961...
1AA1[[-221.94339159438036, -221.9371806257653, -22...[[-11.524404444499638, -11.524404444499638, -1...[[-9.994265686136963, -9.994265686136963, -9.9...
2D1[[-325.1734666200506, -337.9629833826362, -363...[[-15.686472752824004, -15.686472752824004, -1...[[1.1662679563217837, 1.1662679563217837, 1.16...
3B1[[-447.436674575593, -452.4805757789892, -464....[[-34.727038663343485, -34.727038663343485, -3...[[-34.82559064172155, -34.82559064172155, -34....
4L1[[-292.9672733282472, -267.3215759439314, -275...[[-3.6625277966476655, -3.6625277966476655, -3...[[7.597755619620676, 7.597755619620676, 7.5977...
\n", 524 | "
" 525 | ], 526 | "text/plain": [ 527 | " phones labels MFCCs \\\n", 528 | "0 G 1 [[-423.77322755406794, -411.9959932464969, -41... \n", 529 | "1 AA 1 [[-221.94339159438036, -221.9371806257653, -22... \n", 530 | "2 D 1 [[-325.1734666200506, -337.9629833826362, -363... \n", 531 | "3 B 1 [[-447.436674575593, -452.4805757789892, -464.... \n", 532 | "4 L 1 [[-292.9672733282472, -267.3215759439314, -275... \n", 533 | "\n", 534 | " deltaC \\\n", 535 | "0 [[22.300710618871317, 22.300710618871317, 22.3... \n", 536 | "1 [[-11.524404444499638, -11.524404444499638, -1... \n", 537 | "2 [[-15.686472752824004, -15.686472752824004, -1... \n", 538 | "3 [[-34.727038663343485, -34.727038663343485, -3... \n", 539 | "4 [[-3.6625277966476655, -3.6625277966476655, -3... \n", 540 | "\n", 541 | " doubleDeltaC \n", 542 | "0 [[9.696126870518768, 9.696126870518768, 9.6961... \n", 543 | "1 [[-9.994265686136963, -9.994265686136963, -9.9... \n", 544 | "2 [[1.1662679563217837, 1.1662679563217837, 1.16... \n", 545 | "3 [[-34.82559064172155, -34.82559064172155, -34.... \n", 546 | "4 [[7.597755619620676, 7.597755619620676, 7.5977... " 547 | ] 548 | }, 549 | "execution_count": 77, 550 | "metadata": {}, 551 | "output_type": "execute_result" 552 | } 553 | ], 554 | "source": [ 555 | "test_featureDf.head()" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": 78, 561 | "metadata": {}, 562 | "outputs": [ 563 | { 564 | "data": { 565 | "text/plain": [ 566 | "(42,)" 567 | ] 568 | }, 569 | "execution_count": 78, 570 | "metadata": {}, 571 | "output_type": "execute_result" 572 | } 573 | ], 574 | "source": [ 575 | "test_featureDf.phones.unique().shape" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": 81, 581 | "metadata": { 582 | "scrolled": false 583 | }, 584 | "outputs": [ 585 | { 586 | "name": "stdout", 587 | "output_type": "stream", 588 | "text": [ 589 | "Shape of X_test: (240, 13, 50, 3)\n", 590 | "Shape of y_test: (240,)\n", 591 | "Shape of X_test: (450, 13, 50, 3)\n", 592 | "Shape of y_test: (450,)\n", 593 | "Shape of X_test: (1284, 13, 50, 3)\n", 594 | "Shape of y_test: (1284,)\n", 595 | "Shape of X_test: (418, 13, 50, 3)\n", 596 | "Shape of y_test: (418,)\n", 597 | "Shape of X_test: (981, 13, 50, 3)\n", 598 | "Shape of y_test: (981,)\n", 599 | "Shape of X_test: (689, 13, 50, 3)\n", 600 | "Shape of y_test: (689,)\n", 601 | "Shape of X_test: (1304, 13, 50, 3)\n", 602 | "Shape of y_test: (1304,)\n", 603 | "Shape of X_test: (1763, 13, 50, 3)\n", 604 | "Shape of y_test: (1763,)\n", 605 | "Shape of X_test: (2904, 13, 50, 3)\n", 606 | "Shape of y_test: (2904,)\n", 607 | "Shape of X_test: (971, 13, 50, 3)\n", 608 | "Shape of y_test: (971,)\n", 609 | "Shape of X_test: (683, 13, 50, 3)\n", 610 | "Shape of y_test: (683,)\n", 611 | "Shape of X_test: (819, 13, 50, 3)\n", 612 | "Shape of y_test: (819,)\n", 613 | "Shape of X_test: (638, 13, 50, 3)\n", 614 | "Shape of y_test: (638,)\n", 615 | "Shape of X_test: (554, 13, 50, 3)\n", 616 | "Shape of y_test: (554,)\n", 617 | "Shape of X_test: (504, 13, 50, 3)\n", 618 | "Shape of y_test: (504,)\n", 619 | "Shape of X_test: (1946, 13, 50, 3)\n", 620 | "Shape of y_test: (1946,)\n", 621 | "Shape of X_test: (1250, 13, 50, 3)\n", 622 | "Shape of y_test: (1250,)\n", 623 | "Shape of X_test: (2092, 13, 50, 3)\n", 624 | "Shape of y_test: (2092,)\n", 625 | "Shape of X_test: (418, 13, 50, 3)\n", 626 | "Shape of y_test: (418,)\n", 627 | "Shape of X_test: (963, 13, 50, 3)\n", 628 | "Shape of y_test: (963,)\n", 629 | "Shape of X_test: (813, 13, 50, 3)\n", 630 | "Shape of y_test: (813,)\n", 631 | "Shape of X_test: (742, 13, 50, 3)\n", 632 | "Shape of y_test: (742,)\n", 633 | "Shape of X_test: (615, 13, 50, 3)\n", 634 | "Shape of y_test: (615,)\n", 635 | "Shape of X_test: (1236, 13, 50, 3)\n", 636 | "Shape of y_test: (1236,)\n", 637 | "Shape of X_test: (1591, 13, 50, 3)\n", 638 | "Shape of y_test: (1591,)\n", 639 | "Shape of X_test: (889, 13, 50, 3)\n", 640 | "Shape of y_test: (889,)\n", 641 | "Shape of X_test: (194, 13, 50, 3)\n", 642 | "Shape of y_test: (194,)\n", 643 | "Shape of X_test: (1046, 13, 50, 3)\n", 644 | "Shape of y_test: (1046,)\n", 645 | "Shape of X_test: (398, 13, 50, 3)\n", 646 | "Shape of y_test: (398,)\n", 647 | "Shape of X_test: (1340, 13, 50, 3)\n", 648 | "Shape of y_test: (1340,)\n", 649 | "Shape of X_test: (810, 13, 50, 3)\n", 650 | "Shape of y_test: (810,)\n", 651 | "Shape of X_test: (920, 13, 50, 3)\n", 652 | "Shape of y_test: (920,)\n", 653 | "Shape of X_test: (386, 13, 50, 3)\n", 654 | "Shape of y_test: (386,)\n", 655 | "Shape of X_test: (610, 13, 50, 3)\n", 656 | "Shape of y_test: (610,)\n", 657 | "Shape of X_test: (160, 13, 50, 3)\n", 658 | "Shape of y_test: (160,)\n", 659 | "Shape of X_test: (240, 13, 50, 3)\n", 660 | "Shape of y_test: (240,)\n", 661 | "Shape of X_test: (159, 13, 50, 3)\n", 662 | "Shape of y_test: (159,)\n", 663 | "Shape of X_test: (701, 13, 50, 3)\n", 664 | "Shape of y_test: (701,)\n", 665 | "Shape of X_test: (244, 13, 50, 3)\n", 666 | "Shape of y_test: (244,)\n", 667 | "Shape of X_test: (60, 13, 50, 3)\n", 668 | "Shape of y_test: (60,)\n", 669 | "Shape of X_test: (3, 13, 50, 3)\n", 670 | "Shape of y_test: (3,)\n" 671 | ] 672 | } 673 | ], 674 | "source": [ 675 | "for phone in test_featureDf.phones.unique():\n", 676 | " if phone != \"AX\":\n", 677 | " p_featureDf = test_featureDf[test_featureDf[\"phones\"] == phone]\n", 678 | " X_test = np.array((p_featureDf.MFCCs,p_featureDf.deltaC,p_featureDf.doubleDeltaC))\n", 679 | " X_test = np.transpose(X_test,[1,2,3,0])\n", 680 | " y_test = p_featureDf.labels.values\n", 681 | " print(\"Shape of X_test: \", X_test.shape)\n", 682 | " print(\"Shape of y_test: \", y_test.shape)\n", 683 | " np.save(\"./L2 Phones Dev/\"+phone+\"_X.npy\",X_test)\n", 684 | " np.save(\"./L2 Phones Dev/\"+phone+\"_y.npy\",y_test)" 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": 85, 690 | "metadata": {}, 691 | "outputs": [ 692 | { 693 | "data": { 694 | "text/plain": [ 695 | "False" 696 | ] 697 | }, 698 | "execution_count": 85, 699 | "metadata": {}, 700 | "output_type": "execute_result" 701 | } 702 | ], 703 | "source": [ 704 | "\"ZH\" in test_featureDf.phones.unique()" 705 | ] 706 | } 707 | ], 708 | "metadata": { 709 | "kernelspec": { 710 | "display_name": "Python 3", 711 | "language": "python", 712 | "name": "python3" 713 | }, 714 | "language_info": { 715 | "codemirror_mode": { 716 | "name": "ipython", 717 | "version": 3 718 | }, 719 | "file_extension": ".py", 720 | "mimetype": "text/x-python", 721 | "name": "python", 722 | "nbconvert_exporter": "python", 723 | "pygments_lexer": "ipython3", 724 | "version": "3.7.3" 725 | } 726 | }, 727 | "nbformat": 4, 728 | "nbformat_minor": 2 729 | } 730 | -------------------------------------------------------------------------------- /PhoneLevel_AutoEncoder_MFCC_TrainCamb_TestCamb.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"PhoneLevel_AutoEncoder_MFCC_TrainCamb_TestCamb.ipynb","version":"0.3.2","provenance":[{"file_id":"1wFBnH134wmMe7tJVJoPYNrj-IN6AvbuK","timestamp":1550071433337},{"file_id":"1tU9msAU_Mq8R6GCBRgoZAph_2_gXe3Vu","timestamp":1548609256683},{"file_id":"1P90PoWrDm5O2YkZmI6GKpf7xa63cf7qD","timestamp":1548422095551}],"collapsed_sections":["aXoZguY5b02w","PlZoSV-eb03b"]},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"cell_type":"code","metadata":{"colab_type":"code","outputId":"ac4c6372-9ea5-494b-ab2a-554df42e9064","executionInfo":{"status":"ok","timestamp":1550404890029,"user_tz":-300,"elapsed":30587,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"id":"d1eEVQxriiEp","colab":{"base_uri":"https://localhost:8080/","height":124}},"source":["from google.colab import drive\n","drive.mount('./drive')"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n","\n","Enter your authorization code:\n","··········\n","Mounted at ./drive\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"g2Osav3708xy","colab_type":"text"},"source":["Bism \n","# Imports"]},{"cell_type":"code","metadata":{"id":"Nz23OfFgoMDo","colab_type":"code","colab":{}},"source":["import pandas as pd\n","import numpy as np\n","import os\n","import IPython.display as ipd\n","import librosa\n","import matplotlib\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","%matplotlib inline\n","matplotlib.style.use('ggplot')\n","from tqdm import tqdm_notebook\n","\n","\n","#SKLEARN\n","from sklearn.preprocessing import LabelBinarizer\n","from sklearn.preprocessing import LabelEncoder\n","import sklearn.metrics as sklm\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import confusion_matrix\n","from sklearn.manifold import TSNE\n","\n","#KERAS\n","from keras import models\n","from keras.models import Model, load_model\n","from keras import callbacks\n","import keras\n","from keras.models import Sequential\n","from keras.layers import Input, Dense, Dropout, Flatten, BatchNormalization\n","from keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Conv2DTranspose, AveragePooling2D\n","from keras.layers.advanced_activations import LeakyReLU\n","from keras.optimizers import SGD, Adam"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"colab_type":"text","id":"-1MAYCR6b02u"},"source":["# Implementation 10 (PHONE LEVEL)\n","## Using Cambridge as Training set\n","## L2 as val+test set\n","## AUTOENCODER PER PHONE"]},{"cell_type":"code","metadata":{"id":"VmsOgJh7VZRU","colab_type":"code","outputId":"e42e9f0c-231f-4ba0-94c1-86f9fc100444","executionInfo":{"status":"ok","timestamp":1550404974994,"user_tz":-300,"elapsed":1620,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["phones = [file[:-4] for file in os.listdir('./drive/My Drive/Thesis Work/Cambridge Phones/')]\n","len(phones)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["42"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"XB4rSPscVnJ4","colab_type":"code","colab":{}},"source":["phone = \"L\""],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"colab_type":"text","id":"SdjP405Hb02v"},"source":["### Training"]},{"cell_type":"markdown","metadata":{"colab_type":"text","id":"aXoZguY5b02w"},"source":["#### X_TRAIN, X_TEST & Y_TEST"]},{"cell_type":"code","metadata":{"colab_type":"code","id":"jDk4o_z3b03O","outputId":"ff5749cf-5aad-4aa6-8ad9-34eb1d343722","executionInfo":{"status":"ok","timestamp":1550415590652,"user_tz":-300,"elapsed":1549,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["X = np.load('./drive/My Drive/Thesis Work/Cambridge Phones/'+phone+'.npy')\n","np.random.shuffle(X)\n","X.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(1093, 13, 50, 3)"]},"metadata":{"tags":[]},"execution_count":162}]},{"cell_type":"code","metadata":{"id":"jv1v_SzxV2iF","colab_type":"code","outputId":"ef386592-6f66-4cdf-bedf-709f32b518cb","executionInfo":{"status":"ok","timestamp":1550415593579,"user_tz":-300,"elapsed":1241,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["X_train = X[:int(np.ceil(X.shape[0]*0.9))]\n","X_test = X[int(np.ceil(X.shape[0]*0.9)):]\n","y_test = np.ones(X_test.shape[0])\n","X_train.shape[0]+X_test.shape[0]"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["1093"]},"metadata":{"tags":[]},"execution_count":163}]},{"cell_type":"code","metadata":{"id":"hWFWVBxKV5zC","colab_type":"code","outputId":"264a954d-77b5-4cab-b1e9-ffac9573802e","executionInfo":{"status":"ok","timestamp":1550415594192,"user_tz":-300,"elapsed":1476,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["y_test.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(109,)"]},"metadata":{"tags":[]},"execution_count":164}]},{"cell_type":"code","metadata":{"id":"tLWLc2D6V7WL","colab_type":"code","colab":{}},"source":["for p in phones:\n"," if p != phone:\n"," notPhone = np.load('./drive/My Drive/Thesis Work/Cambridge Phones/'+p+'.npy')\n"," np.random.shuffle(notPhone)\n"," X_test = np.vstack((X_test,notPhone[:int(np.ceil(y_test.shape[0]/41))]))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"zhKn3UqfV93U","colab_type":"code","outputId":"9e83e699-c54d-46ed-ccf6-a3040778f894","executionInfo":{"status":"ok","timestamp":1550415599202,"user_tz":-300,"elapsed":5260,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["y_test = np.hstack((y_test,np.zeros(int(np.ceil(y_test.shape[0]/41)*41))))\n","y_test.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(232,)"]},"metadata":{"tags":[]},"execution_count":166}]},{"cell_type":"code","metadata":{"id":"7qTrr5xrV_8R","colab_type":"code","outputId":"84cc9e08-e046-4f78-d17b-a49bc227df96","executionInfo":{"status":"ok","timestamp":1550415599203,"user_tz":-300,"elapsed":4678,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["X_test.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(232, 13, 50, 3)"]},"metadata":{"tags":[]},"execution_count":167}]},{"cell_type":"code","metadata":{"id":"zS0djzmoWBUF","colab_type":"code","outputId":"27f8343a-ad0e-41b7-80db-e6ec1ce8c460","executionInfo":{"status":"ok","timestamp":1550415599205,"user_tz":-300,"elapsed":4516,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["X_train.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(984, 13, 50, 3)"]},"metadata":{"tags":[]},"execution_count":168}]},{"cell_type":"markdown","metadata":{"colab_type":"text","id":"9uVHuupBb03r"},"source":["#### MODEL"]},{"cell_type":"code","metadata":{"id":"RcnBZK3XcqgT","colab_type":"code","colab":{}},"source":["#commented to avoid accedental runs\n","'''for phone in set(train_phones):\n"," np.save(\"./drive/My Drive/Thesis Work/Implementation6/phone_means/mean_\"+phone+\".npy\",\\\n"," np.max(np.abs(x_trainALL[train_phones == phone]),(0,1,2)))''';"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"nunEjrDyhRZa","colab_type":"code","outputId":"ab8bdc40-922a-43b3-82c6-9376eb0d5ff3","executionInfo":{"status":"ok","timestamp":1550415599212,"user_tz":-300,"elapsed":3085,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":52}},"source":["train_max = np.load('./drive/My Drive/Thesis Work/L2 Phones/train_phones_means/mean_'+phone+\".npy\") #???\n","X_train = X_train / train_max\n","print(phone)\n","print(X_train.shape)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["L\n","(984, 13, 50, 3)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"g8NCYflaOwmU","colab_type":"code","outputId":"e4029652-54ff-42da-9ce3-fd72e7696a70","executionInfo":{"status":"ok","timestamp":1550415599214,"user_tz":-300,"elapsed":2815,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":52}},"source":["print(np.max(X_train))\n","print(np.min(X_train))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["0.7820627989513965\n","-1.240481751809753\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"7u9ovJiBQumW","colab_type":"code","outputId":"e94b67e9-1c21-4a70-b9cb-30c09a911bfb","executionInfo":{"status":"ok","timestamp":1550415599215,"user_tz":-300,"elapsed":2543,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["np.nan in X_train"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["False"]},"metadata":{"tags":[]},"execution_count":172}]},{"cell_type":"code","metadata":{"id":"KCj2Lc4s852C","colab_type":"code","colab":{}},"source":["'''noise_factor = 0.5\n","x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape) \n","x_train_noisy = np.clip(x_train_noisy, -1, 1.)''';"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"TpGhM5-pYImQ","colab_type":"code","colab":{}},"source":["input_img = Input(shape=(13, 50, 3)) # adapt this if using `channels_first` image data format\n","\n","x = Conv2D(64, (2, 3), activation='tanh', padding='valid')(input_img)\n","x = MaxPooling2D((2, 2), padding='same')(x)\n","x = BatchNormalization()(x)\n","x = Conv2D(128, (3, 3), activation='tanh', padding='same')(x)\n","encoded = MaxPooling2D((2, 2), padding='same')(x)\n","\n","# at this point the representation is (3, 12, 32)\n","\n","x = BatchNormalization()(encoded)\n","x = Conv2D(128, (3, 3), activation='tanh', padding='same')(x)\n","x = UpSampling2D((2, 2))(x)\n","x = BatchNormalization()(x)\n","x = Conv2D(64, (3, 3), activation='tanh', padding='same')(x)\n","x = UpSampling2D((2, 2))(x)\n","x = BatchNormalization()(x)\n","decoded = Conv2DTranspose(3, (2, 3), activation='tanh', padding='valid')(x)\n","\n","autoencoder = Model(input_img, decoded)\n","\n","autoencoder.compile(optimizer= keras.optimizers.Adam(), loss='mean_squared_error');"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"puKPdi-KLQE0","colab_type":"code","outputId":"a2e06851-dc8e-47bd-a720-fd7af100be3c","executionInfo":{"status":"ok","timestamp":1550415600859,"user_tz":-300,"elapsed":1237,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":624}},"source":["autoencoder.summary()"],"execution_count":0,"outputs":[{"output_type":"stream","text":["_________________________________________________________________\n","Layer (type) Output Shape Param # \n","=================================================================\n","input_4 (InputLayer) (None, 13, 50, 3) 0 \n","_________________________________________________________________\n","conv2d_13 (Conv2D) (None, 12, 48, 64) 1216 \n","_________________________________________________________________\n","max_pooling2d_7 (MaxPooling2 (None, 6, 24, 64) 0 \n","_________________________________________________________________\n","batch_normalization_13 (Batc (None, 6, 24, 64) 256 \n","_________________________________________________________________\n","conv2d_14 (Conv2D) (None, 6, 24, 128) 73856 \n","_________________________________________________________________\n","max_pooling2d_8 (MaxPooling2 (None, 3, 12, 128) 0 \n","_________________________________________________________________\n","batch_normalization_14 (Batc (None, 3, 12, 128) 512 \n","_________________________________________________________________\n","conv2d_15 (Conv2D) (None, 3, 12, 128) 147584 \n","_________________________________________________________________\n","up_sampling2d_7 (UpSampling2 (None, 6, 24, 128) 0 \n","_________________________________________________________________\n","batch_normalization_15 (Batc (None, 6, 24, 128) 512 \n","_________________________________________________________________\n","conv2d_16 (Conv2D) (None, 6, 24, 64) 73792 \n","_________________________________________________________________\n","up_sampling2d_8 (UpSampling2 (None, 12, 48, 64) 0 \n","_________________________________________________________________\n","batch_normalization_16 (Batc (None, 12, 48, 64) 256 \n","_________________________________________________________________\n","conv2d_transpose_4 (Conv2DTr (None, 13, 50, 3) 1155 \n","=================================================================\n","Total params: 299,139\n","Trainable params: 298,371\n","Non-trainable params: 768\n","_________________________________________________________________\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"13yviC13LEPj","colab_type":"code","outputId":"8e6d3dd2-5955-4041-bb2a-bc8316077c7a","executionInfo":{"status":"ok","timestamp":1550415795473,"user_tz":-300,"elapsed":5857,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":243}},"source":["callbacklist=[callbacks.History(),callbacks.ModelCheckpoint('./jawadmodel.h5', monitor='loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=5)]\n","autoencoder.fit(X_train, X_train, batch_size=32, epochs=5, callbacks=callbacklist)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Epoch 1/5\n","984/984 [==============================] - 1s 954us/step - loss: 3.4366e-04\n","Epoch 2/5\n","984/984 [==============================] - 1s 949us/step - loss: 3.1935e-04\n","Epoch 3/5\n","984/984 [==============================] - 1s 952us/step - loss: 3.0562e-04\n","Epoch 4/5\n","984/984 [==============================] - 1s 942us/step - loss: 3.0260e-04\n","Epoch 5/5\n","984/984 [==============================] - 1s 955us/step - loss: 3.3670e-04\n","\n","Epoch 00005: loss improved from inf to 0.00034, saving model to ./jawadmodel.h5\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":177}]},{"cell_type":"code","metadata":{"id":"-iAV6rfs4Dq-","colab_type":"code","outputId":"52e89b6b-013e-4aa7-8b71-4fb02dd2bccf","executionInfo":{"status":"ok","timestamp":1550415862095,"user_tz":-300,"elapsed":1071,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["modelLoss = _.history['loss'][-1]\n","modelLoss"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.0003367000246681727"]},"metadata":{"tags":[]},"execution_count":178}]},{"cell_type":"code","metadata":{"id":"Y__-HEGx5dzC","colab_type":"code","outputId":"1156cf4c-566f-4b66-9bc7-1ae584f7bb82","executionInfo":{"status":"ok","timestamp":1550415862604,"user_tz":-300,"elapsed":930,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["phone"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["'L'"]},"metadata":{"tags":[]},"execution_count":179}]},{"cell_type":"code","metadata":{"id":"GjKD1mDybdML","colab_type":"code","colab":{}},"source":["!cp ./jawadmodel.h5 \"./drive/My Drive/Thesis Work/PhoneLevel_AutoEncoder_TrainCamb_TestCamb/\"$phone\".h5\""],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"colab_type":"text","id":"_PSH9Rrwb04P"},"source":["### TESTING"]},{"cell_type":"code","metadata":{"id":"FtMPiBEfY70a","colab_type":"code","colab":{}},"source":["folder = '\"./drive/My Drive/Thesis Work/PhoneLevel_AutoEncoder_MFCC_TrainCamb_TestCamb/\"'"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"colab_type":"code","outputId":"80f549f8-fe97-41b0-fb62-eaf67cb44624","executionInfo":{"status":"ok","timestamp":1550415871934,"user_tz":-300,"elapsed":5540,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"id":"0fEs9WhE6x5G","colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["!ls $folder"],"execution_count":0,"outputs":[{"output_type":"stream","text":["AA.h5 B.h5 L.h5 R.h5\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"61R7miB26x5M","outputId":"cfcfb064-e55b-41cd-ee10-37100c041dab","executionInfo":{"status":"ok","timestamp":1550415871935,"user_tz":-300,"elapsed":4842,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":69}},"source":["#MAX normalization\n","train_max = np.load('./drive/My Drive/Thesis Work/L2 Phones/train_phones_means/mean_'+phone+\".npy\")\n","X_test = X_test / train_max\n","print(phone)\n","print(X_test.shape)\n","print(y_test.shape)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["L\n","(232, 13, 50, 3)\n","(232,)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"jZFkKHEz-sgO","colab_type":"code","colab":{}},"source":["'''noise_factor = 0.5\n","x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape) \n","x_test_noisy = np.clip(x_test_noisy, -1, 1)''';"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"Y_wyvIyNNgGv","colab_type":"code","colab":{}},"source":["autoencoder = load_model(folder[1:-1]+phone+'.h5') #load previous one without noise"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"DcSsvupx7_ks","colab_type":"code","outputId":"a1249e34-5ed0-4513-c999-7b26233a9fa4","executionInfo":{"status":"ok","timestamp":1550415877700,"user_tz":-300,"elapsed":3581,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":52}},"source":["autoencoder.evaluate(X_test[y_test == 1],X_test[y_test == 1],batch_size=1)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["109/109 [==============================] - 1s 8ms/step\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["0.00034479620474510306"]},"metadata":{"tags":[]},"execution_count":186}]},{"cell_type":"code","metadata":{"id":"HcbgxRQb8Jm3","colab_type":"code","outputId":"2e27b281-3497-4571-d94c-95e7792c65c0","executionInfo":{"status":"ok","timestamp":1550415878307,"user_tz":-300,"elapsed":3511,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":52}},"source":["autoencoder.evaluate(X_test[y_test == 0],X_test[y_test == 0],batch_size=1)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["123/123 [==============================] - 0s 3ms/step\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["0.0008833857675081088"]},"metadata":{"tags":[]},"execution_count":187}]},{"cell_type":"code","metadata":{"id":"mI-jGyjJdbOA","colab_type":"code","colab":{}},"source":["#Need to shuffle X_test"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"cNT9hygGeH7o","colab_type":"code","colab":{}},"source":["shuffledIndexes = np.random.permutation(range(X_test.shape[0]))\n","X_test = X_test[shuffledIndexes]\n","y_test = y_test[shuffledIndexes]"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"90ZPn9cfa57s","colab_type":"code","outputId":"247c5782-2468-400d-eac9-c6600baae092","executionInfo":{"status":"ok","timestamp":1550415891610,"user_tz":-300,"elapsed":1082,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["X_test.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(232, 13, 50, 3)"]},"metadata":{"tags":[]},"execution_count":190}]},{"cell_type":"code","metadata":{"colab_type":"code","outputId":"dc2eb4e9-7c1b-443c-ded3-a56984bda553","executionInfo":{"status":"ok","timestamp":1550415949130,"user_tz":-300,"elapsed":18374,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"id":"HfysdAru6x5b","colab":{"base_uri":"https://localhost:8080/","height":159}},"source":["validationSetLength = 70 #initial 200 from test as validation set\n","accs = []\n","f1mis = []\n","\n","for threshold in np.arange(0.0001,0.001,0.00001):\n"," y_pred = [1 if autoencoder.evaluate(X_test[i:i+1],X_test[i:i+1],verbose=0)<=threshold\\\n"," else 0 for i in range(validationSetLength)]\n"," \n"," accs.append(sklm.accuracy_score(y_test[:validationSetLength],y_pred))\n"," f1mis.append(sklm.precision_recall_fscore_support(y_test[:validationSetLength],y_pred)[2][0])\n","\n","print(\"Max Achievable Accuracy (Overall): \", max(accs))\n","print(\"Max Achievable Accuracy (Overall) on Threshold: \", np.argmax(accs))\n","\n","print(\"Max Achievable F-1 score Threshold: \", np.argmax(f1mis))\n","print(\"Max Achievable Accuracy On This Threshold: \", accs[np.argmax(f1mis)])\n","print(\"Max Achievable F-1 score for Mispronunciations: \", max(f1mis))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/sklearn/metrics/classification.py:1143: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n"," 'precision', 'predicted', average, warn_for)\n"],"name":"stderr"},{"output_type":"stream","text":["Max Achievable Accuracy (Overall): 0.7428571428571429\n","Max Achievable Accuracy (Overall) on Threshold: 36\n","Max Achievable F-1 score Threshold: 19\n","Max Achievable Accuracy On This Threshold: 0.6571428571428571\n","Max Achievable F-1 score for Mispronunciations: 0.7209302325581395\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"02bhChZmEp3Z","colab_type":"code","outputId":"6ede0d14-13ed-4a78-e4b0-008759e5b256","executionInfo":{"status":"ok","timestamp":1550415967425,"user_tz":-300,"elapsed":1470,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["0.0001+0.00001*19"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.00029"]},"metadata":{"tags":[]},"execution_count":192}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"odgF6cpn6x5g","colab":{}},"source":["validationSetLength = 70 #initial some from test as validation set\n","threshold = 0.00029\n","\n","y_pred = [1 if autoencoder.evaluate(X_test[i:i+1],X_test[i:i+1],verbose=0)<=threshold\\\n"," else 0 for i in range(validationSetLength,X_test.shape[0])] "],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"0q1vQoQJ_rNz","colab_type":"code","outputId":"8fb87c6d-b889-43ec-ef61-8ebd168ef6c7","executionInfo":{"status":"ok","timestamp":1550415981067,"user_tz":-300,"elapsed":1060,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh3.googleusercontent.com/-ULUyTZ5_x-U/AAAAAAAAAAI/AAAAAAAAAAc/EGjTA3kFgyU/s64/photo.jpg","userId":"04817818996853836929"}},"colab":{"base_uri":"https://localhost:8080/","height":295}},"source":["print(\"Phone: \",phone)\n","print(classification_report(y_test[validationSetLength:], y_pred, target_names=['Mispronounced','Correct']))\n","print(confusion_matrix(y_test[validationSetLength:], y_pred, labels=range(2)))\n","\n","print(sklm.roc_curve(y_test[validationSetLength:],y_pred))\n","print('FAR = ', sklm.roc_curve(y_test[validationSetLength:],y_pred)[0][1]*100,'%')\n","print('FRR = ', (1 - sklm.roc_curve(y_test[validationSetLength:],y_pred)[1][1])*100,'%')\n","print(\"AUC_SCORE = \", sklm.roc_auc_score(y_test[validationSetLength:],y_pred)*100,'%')"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Phone: L\n"," precision recall f1-score support\n","\n","Mispronounced 0.65 0.92 0.76 90\n"," Correct 0.79 0.38 0.51 72\n","\n"," micro avg 0.68 0.68 0.68 162\n"," macro avg 0.72 0.65 0.64 162\n"," weighted avg 0.71 0.68 0.65 162\n","\n","[[83 7]\n"," [45 27]]\n","(array([0. , 0.07777778, 1. ]), array([0. , 0.375, 1. ]), array([2, 1, 0]))\n","FAR = 7.777777777777778 %\n","FRR = 62.5 %\n","AUC_SCORE = 64.86111111111111 %\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"colab_type":"text","id":"jaUXp7GCA8Oj"},"source":["*CANNOT COMPARE RESULTS:* \n","The Interspeech one has a **different dataset** and shows results on phone level (as every phone is a separate class there)\n","\n","The l2-arctic one shows no results\n","\n","I am the first one to show results on this dataset"]}]} -------------------------------------------------------------------------------- /PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2.ipynb","version":"0.3.2","provenance":[{"file_id":"1tU9msAU_Mq8R6GCBRgoZAph_2_gXe3Vu","timestamp":1548609256683},{"file_id":"1P90PoWrDm5O2YkZmI6GKpf7xa63cf7qD","timestamp":1548422095551}],"collapsed_sections":["aXoZguY5b02w","PlZoSV-eb03b"]},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"metadata":{"colab_type":"code","outputId":"4de7ba35-79d2-4107-c45c-f5dc22bfe370","executionInfo":{"status":"ok","timestamp":1554945986590,"user_tz":-300,"elapsed":35273,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"d1eEVQxriiEp","colab":{"base_uri":"https://localhost:8080/","height":124}},"cell_type":"code","source":["from google.colab import drive\n","drive.mount('./drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n","\n","Enter your authorization code:\n","··········\n","Mounted at ./drive\n"],"name":"stdout"}]},{"metadata":{"id":"g2Osav3708xy","colab_type":"text"},"cell_type":"markdown","source":["Bism \n","# Imports"]},{"metadata":{"id":"Nz23OfFgoMDo","colab_type":"code","outputId":"dd7d1266-088a-40de-a078-92ca4b542bb6","executionInfo":{"status":"ok","timestamp":1554807397258,"user_tz":-300,"elapsed":5824,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["import pandas as pd\n","import numpy as np\n","import os\n","import IPython.display as ipd\n","import librosa\n","import matplotlib\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","%matplotlib inline\n","matplotlib.style.use('ggplot')\n","from tqdm import tqdm_notebook\n","\n","\n","#SKLEARN\n","from sklearn.preprocessing import LabelBinarizer\n","from sklearn.preprocessing import LabelEncoder\n","import sklearn.metrics as sklm\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import confusion_matrix\n","from sklearn.manifold import TSNE\n","\n","#KERAS\n","from keras import models\n","from keras.models import Model, load_model\n","from keras import callbacks\n","import keras\n","from keras.models import Sequential\n","from keras.layers import Input, Dense, Dropout, Flatten, BatchNormalization\n","from keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Conv2DTranspose, AveragePooling2D\n","from keras.layers.advanced_activations import LeakyReLU\n","from keras.optimizers import SGD, Adam"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Using TensorFlow backend.\n"],"name":"stderr"}]},{"metadata":{"colab_type":"text","id":"-1MAYCR6b02u"},"cell_type":"markdown","source":["# Implementation 6 (PHONE LEVEL)\n","## Using L2Training set\n","## L2 as val+test set\n","## AUTOENCODER PER PHONE"]},{"metadata":{"colab_type":"text","id":"SdjP405Hb02v"},"cell_type":"markdown","source":["### Training"]},{"metadata":{"colab_type":"text","id":"9uVHuupBb03r"},"cell_type":"markdown","source":["#### MODEL"]},{"metadata":{"id":"9pjiynBVx18-","colab_type":"code","outputId":"857c7c39-96aa-44c8-9ec3-6c0d1eb25131","executionInfo":{"status":"ok","timestamp":1554789584917,"user_tz":-300,"elapsed":3249,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":50}},"cell_type":"code","source":["!ls './drive/My Drive/Thesis Work/PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2/'"],"execution_count":0,"outputs":[{"output_type":"stream","text":["phone_means test_labels.npy test_typeOfError.npy x_test.npy\n","phone_models test_phones.npy train_phones.npy x_train.npy\n"],"name":"stdout"}]},{"metadata":{"colab_type":"code","id":"1-vlcC4Lb038","colab":{}},"cell_type":"code","source":["x_trainALL = np.load('./drive/My Drive/Thesis Work/PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2/x_train.npy')\n","train_phones = np.load('./drive/My Drive/Thesis Work/PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2/train_phones.npy')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"8P94PR51Y0w5","colab_type":"code","outputId":"f7fecb1b-b4ea-421b-bb7c-312aae36f66b","executionInfo":{"status":"ok","timestamp":1554789605427,"user_tz":-300,"elapsed":19841,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["train_phones.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(65113,)"]},"metadata":{"tags":[]},"execution_count":5}]},{"metadata":{"id":"wDGsRY3HNyjc","colab_type":"code","outputId":"81b01ee6-d8d3-4a4b-d263-f286cbeb99a3","executionInfo":{"status":"ok","timestamp":1554789605430,"user_tz":-300,"elapsed":19639,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["x_trainALL.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(65113, 13, 50, 3)"]},"metadata":{"tags":[]},"execution_count":6}]},{"metadata":{"id":"RcnBZK3XcqgT","colab_type":"code","colab":{}},"cell_type":"code","source":["#commented to avoid accedental runs\n","'''for phone in set(train_phones):\n"," np.save(\"./drive/My Drive/Thesis Work/Implementation6/phone_means/mean_\"+phone+\".npy\",\\\n"," np.max(np.abs(x_trainALL[train_phones == phone]),(0,1,2)))''';"],"execution_count":0,"outputs":[]},{"metadata":{"id":"FUndJmANsikt","colab_type":"code","outputId":"8408467f-63bf-4a70-86f7-0a99a73c4025","executionInfo":{"status":"ok","timestamp":1554789605437,"user_tz":-300,"elapsed":14755,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":54}},"cell_type":"code","source":["print(set(train_phones))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["{'AY', 'AH', 'K', 'TH', 'L', 'EH', 'IY', 'AA', 'AE', 'sil', 'sp', 'ER', 'AW', 'DH', 'OW', 'Z', 'UW', 'N', 'HH', 'AO', 'UH', 'IH', 'EY', 'V', 'S', 'D', 'JH', 'F', 'T', 's', 'P', 'Y', 'M', 'G', 'NG', 'OY', 'W', 'CH', 'ZH', 'B', 'SH', 'R'}\n"],"name":"stdout"}]},{"metadata":{"id":"nunEjrDyhRZa","colab_type":"code","colab":{}},"cell_type":"code","source":["phone = 'L'\n","train_max = np.load('./drive/My Drive/Thesis Work/PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2/phone_means/mean_'+phone+\".npy\")\n","x_train = x_trainALL[train_phones == phone] / train_max"],"execution_count":0,"outputs":[]},{"metadata":{"id":"NSR_aOY3Nl4f","colab_type":"code","outputId":"94a9c8c6-94cd-430b-a068-61da772aee44","executionInfo":{"status":"ok","timestamp":1554789610606,"user_tz":-300,"elapsed":856,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["x_train.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(2636, 13, 50, 3)"]},"metadata":{"tags":[]},"execution_count":11}]},{"metadata":{"id":"g8NCYflaOwmU","colab_type":"code","outputId":"2a352873-ffff-42d7-b1b6-0fb6b61ade6e","executionInfo":{"status":"ok","timestamp":1554789614552,"user_tz":-300,"elapsed":1074,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":50}},"cell_type":"code","source":["print(np.max(x_train))\n","print(np.min(x_train))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["0.7600943778948096\n","-1.0\n"],"name":"stdout"}]},{"metadata":{"id":"7u9ovJiBQumW","colab_type":"code","outputId":"3f752dac-874c-49f0-d011-cb67449eb9b2","executionInfo":{"status":"ok","timestamp":1554789639245,"user_tz":-300,"elapsed":997,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["np.nan in x_train"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["False"]},"metadata":{"tags":[]},"execution_count":13}]},{"metadata":{"id":"KCj2Lc4s852C","colab_type":"code","colab":{}},"cell_type":"code","source":["'''noise_factor = 0.5\n","x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape) \n","x_train_noisy = np.clip(x_train_noisy, -1, 1.)''';"],"execution_count":0,"outputs":[]},{"metadata":{"id":"TB5QGyRh98zc","colab_type":"code","colab":{}},"cell_type":"code","source":["#x_train_noisy.shape"],"execution_count":0,"outputs":[]},{"metadata":{"id":"TpGhM5-pYImQ","colab_type":"code","outputId":"630c6210-9386-40cc-d0bd-c35a800bbd7e","executionInfo":{"status":"ok","timestamp":1554789646776,"user_tz":-300,"elapsed":3407,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":87}},"cell_type":"code","source":["input_img = Input(shape=(13, 50, 3)) # adapt this if using `channels_first` image data format\n","\n","x = Conv2D(64, (2, 3), activation='tanh', padding='valid')(input_img)\n","x = MaxPooling2D((2, 2), padding='same')(x)\n","x = BatchNormalization()(x)\n","x = Conv2D(128, (3, 3), activation='tanh', padding='same')(x)\n","encoded = MaxPooling2D((2, 2), padding='same')(x)\n","\n","# at this point the representation is (3, 12, 32)\n","\n","x = BatchNormalization()(encoded)\n","x = Conv2D(128, (3, 3), activation='tanh', padding='same')(x)\n","x = UpSampling2D((2, 2))(x)\n","x = BatchNormalization()(x)\n","x = Conv2D(64, (3, 3), activation='tanh', padding='same')(x)\n","x = UpSampling2D((2, 2))(x)\n","x = BatchNormalization()(x)\n","decoded = Conv2DTranspose(3, (2, 3), activation='tanh', padding='valid')(x)\n","\n","autoencoder = Model(input_img, decoded)\n","\n","autoencoder.compile(optimizer= keras.optimizers.Adam(), loss='mean_squared_error');"],"execution_count":0,"outputs":[{"output_type":"stream","text":["WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n","Instructions for updating:\n","Colocations handled automatically by placer.\n"],"name":"stdout"}]},{"metadata":{"id":"puKPdi-KLQE0","colab_type":"code","outputId":"d8dc8809-ba59-43c3-cac6-762541e17f65","executionInfo":{"status":"ok","timestamp":1554789646778,"user_tz":-300,"elapsed":1643,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":605}},"cell_type":"code","source":["autoencoder.summary()"],"execution_count":0,"outputs":[{"output_type":"stream","text":["_________________________________________________________________\n","Layer (type) Output Shape Param # \n","=================================================================\n","input_1 (InputLayer) (None, 13, 50, 3) 0 \n","_________________________________________________________________\n","conv2d_1 (Conv2D) (None, 12, 48, 64) 1216 \n","_________________________________________________________________\n","max_pooling2d_1 (MaxPooling2 (None, 6, 24, 64) 0 \n","_________________________________________________________________\n","batch_normalization_1 (Batch (None, 6, 24, 64) 256 \n","_________________________________________________________________\n","conv2d_2 (Conv2D) (None, 6, 24, 128) 73856 \n","_________________________________________________________________\n","max_pooling2d_2 (MaxPooling2 (None, 3, 12, 128) 0 \n","_________________________________________________________________\n","batch_normalization_2 (Batch (None, 3, 12, 128) 512 \n","_________________________________________________________________\n","conv2d_3 (Conv2D) (None, 3, 12, 128) 147584 \n","_________________________________________________________________\n","up_sampling2d_1 (UpSampling2 (None, 6, 24, 128) 0 \n","_________________________________________________________________\n","batch_normalization_3 (Batch (None, 6, 24, 128) 512 \n","_________________________________________________________________\n","conv2d_4 (Conv2D) (None, 6, 24, 64) 73792 \n","_________________________________________________________________\n","up_sampling2d_2 (UpSampling2 (None, 12, 48, 64) 0 \n","_________________________________________________________________\n","batch_normalization_4 (Batch (None, 12, 48, 64) 256 \n","_________________________________________________________________\n","conv2d_transpose_1 (Conv2DTr (None, 13, 50, 3) 1155 \n","=================================================================\n","Total params: 299,139\n","Trainable params: 298,371\n","Non-trainable params: 768\n","_________________________________________________________________\n"],"name":"stdout"}]},{"metadata":{"id":"13yviC13LEPj","colab_type":"code","outputId":"41b008b1-7a44-457c-e0ad-442cccb0e248","executionInfo":{"status":"ok","timestamp":1554790080019,"user_tz":-300,"elapsed":104133,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":4066}},"cell_type":"code","source":["callbacklist=[callbacks.History(),callbacks.ModelCheckpoint('./jawadmodel.h5', monitor='loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=5)]\n","autoencoder.fit(x_train, x_train, batch_size=256, epochs=100, callbacks=callbacklist)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Epoch 1/100\n","2636/2636 [==============================] - 1s 423us/step - loss: 4.5529e-04\n","Epoch 2/100\n","2636/2636 [==============================] - 1s 408us/step - loss: 4.8183e-04\n","Epoch 3/100\n","2636/2636 [==============================] - 1s 393us/step - loss: 4.5339e-04\n","Epoch 4/100\n","2636/2636 [==============================] - 1s 402us/step - loss: 4.7317e-04\n","Epoch 5/100\n","2636/2636 [==============================] - 1s 395us/step - loss: 5.0182e-04\n","\n","Epoch 00005: loss improved from inf to 0.00050, saving model to ./jawadmodel.h5\n","Epoch 6/100\n","2636/2636 [==============================] - 1s 393us/step - loss: 4.0902e-04\n","Epoch 7/100\n","2636/2636 [==============================] - 1s 392us/step - loss: 3.9429e-04\n","Epoch 8/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 3.8985e-04\n","Epoch 9/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 5.9057e-04\n","Epoch 10/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 6.8567e-04\n","\n","Epoch 00010: loss did not improve from 0.00050\n","Epoch 11/100\n","2636/2636 [==============================] - 1s 381us/step - loss: 4.4325e-04\n","Epoch 12/100\n","2636/2636 [==============================] - 1s 383us/step - loss: 4.0735e-04\n","Epoch 13/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 3.9277e-04\n","Epoch 14/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 3.9431e-04\n","Epoch 15/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 4.0883e-04\n","\n","Epoch 00015: loss improved from 0.00050 to 0.00041, saving model to ./jawadmodel.h5\n","Epoch 16/100\n","2636/2636 [==============================] - 1s 383us/step - loss: 3.9292e-04\n","Epoch 17/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 4.2691e-04\n","Epoch 18/100\n","2636/2636 [==============================] - 1s 392us/step - loss: 4.4292e-04\n","Epoch 19/100\n","2636/2636 [==============================] - 1s 392us/step - loss: 6.5831e-04\n","Epoch 20/100\n","2636/2636 [==============================] - 1s 385us/step - loss: 4.2406e-04\n","\n","Epoch 00020: loss did not improve from 0.00041\n","Epoch 21/100\n","2636/2636 [==============================] - 1s 389us/step - loss: 3.9462e-04\n","Epoch 22/100\n","2636/2636 [==============================] - 1s 392us/step - loss: 3.6826e-04\n","Epoch 23/100\n","2636/2636 [==============================] - 1s 394us/step - loss: 3.8392e-04\n","Epoch 24/100\n","2636/2636 [==============================] - 1s 389us/step - loss: 4.1061e-04\n","Epoch 25/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 4.2223e-04\n","\n","Epoch 00025: loss did not improve from 0.00041\n","Epoch 26/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 3.8096e-04\n","Epoch 27/100\n","2636/2636 [==============================] - 1s 391us/step - loss: 3.7936e-04\n","Epoch 28/100\n","2636/2636 [==============================] - 1s 390us/step - loss: 4.1034e-04\n","Epoch 29/100\n","2636/2636 [==============================] - 1s 389us/step - loss: 4.1414e-04\n","Epoch 30/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 3.7368e-04\n","\n","Epoch 00030: loss improved from 0.00041 to 0.00037, saving model to ./jawadmodel.h5\n","Epoch 31/100\n","2636/2636 [==============================] - 1s 391us/step - loss: 6.1722e-04\n","Epoch 32/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 4.2307e-04\n","Epoch 33/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 4.5759e-04\n","Epoch 34/100\n","2636/2636 [==============================] - 1s 399us/step - loss: 4.0256e-04\n","Epoch 35/100\n","2636/2636 [==============================] - 1s 393us/step - loss: 4.1917e-04\n","\n","Epoch 00035: loss did not improve from 0.00037\n","Epoch 36/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 3.8548e-04\n","Epoch 37/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 4.4969e-04\n","Epoch 38/100\n","2636/2636 [==============================] - 1s 394us/step - loss: 3.9412e-04\n","Epoch 39/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 4.3662e-04\n","Epoch 40/100\n","2636/2636 [==============================] - 1s 392us/step - loss: 3.7585e-04\n","\n","Epoch 00040: loss did not improve from 0.00037\n","Epoch 41/100\n","2636/2636 [==============================] - 1s 389us/step - loss: 3.6784e-04\n","Epoch 42/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 3.5089e-04\n","Epoch 43/100\n","2636/2636 [==============================] - 1s 392us/step - loss: 4.7467e-04\n","Epoch 44/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 4.8709e-04\n","Epoch 45/100\n","2636/2636 [==============================] - 1s 390us/step - loss: 4.1252e-04\n","\n","Epoch 00045: loss did not improve from 0.00037\n","Epoch 46/100\n","2636/2636 [==============================] - 1s 394us/step - loss: 3.7055e-04\n","Epoch 47/100\n","2636/2636 [==============================] - 1s 390us/step - loss: 3.7615e-04\n","Epoch 48/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 3.8952e-04\n","Epoch 49/100\n","2636/2636 [==============================] - 1s 391us/step - loss: 3.6118e-04\n","Epoch 50/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 5.0879e-04\n","\n","Epoch 00050: loss did not improve from 0.00037\n","Epoch 51/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 4.9170e-04\n","Epoch 52/100\n","2636/2636 [==============================] - 1s 396us/step - loss: 3.9235e-04\n","Epoch 53/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 3.6524e-04\n","Epoch 54/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 3.6848e-04\n","Epoch 55/100\n","2636/2636 [==============================] - 1s 391us/step - loss: 3.5537e-04\n","\n","Epoch 00055: loss improved from 0.00037 to 0.00036, saving model to ./jawadmodel.h5\n","Epoch 56/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 3.7752e-04\n","Epoch 57/100\n","2636/2636 [==============================] - 1s 384us/step - loss: 5.3285e-04\n","Epoch 58/100\n","2636/2636 [==============================] - 1s 390us/step - loss: 3.9202e-04\n","Epoch 59/100\n","2636/2636 [==============================] - 1s 389us/step - loss: 3.6159e-04\n","Epoch 60/100\n","2636/2636 [==============================] - 1s 393us/step - loss: 3.6150e-04\n","\n","Epoch 00060: loss did not improve from 0.00036\n","Epoch 61/100\n","2636/2636 [==============================] - 1s 394us/step - loss: 3.4502e-04\n","Epoch 62/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 3.7025e-04\n","Epoch 63/100\n","2636/2636 [==============================] - 1s 389us/step - loss: 4.3412e-04\n","Epoch 64/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 3.6956e-04\n","Epoch 65/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 3.4788e-04\n","\n","Epoch 00065: loss improved from 0.00036 to 0.00035, saving model to ./jawadmodel.h5\n","Epoch 66/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 3.8274e-04\n","Epoch 67/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 4.4012e-04\n","Epoch 68/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 3.9766e-04\n","Epoch 69/100\n","2636/2636 [==============================] - 1s 385us/step - loss: 3.4174e-04\n","Epoch 70/100\n","2636/2636 [==============================] - 1s 384us/step - loss: 3.9654e-04\n","\n","Epoch 00070: loss did not improve from 0.00035\n","Epoch 71/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 4.9054e-04\n","Epoch 72/100\n","2636/2636 [==============================] - 1s 385us/step - loss: 3.8945e-04\n","Epoch 73/100\n","2636/2636 [==============================] - 1s 394us/step - loss: 3.4033e-04\n","Epoch 74/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 3.5420e-04\n","Epoch 75/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 3.5999e-04\n","\n","Epoch 00075: loss did not improve from 0.00035\n","Epoch 76/100\n","2636/2636 [==============================] - 1s 394us/step - loss: 4.0535e-04\n","Epoch 77/100\n","2636/2636 [==============================] - 1s 394us/step - loss: 3.8627e-04\n","Epoch 78/100\n","2636/2636 [==============================] - 1s 395us/step - loss: 3.9162e-04\n","Epoch 79/100\n","2636/2636 [==============================] - 1s 393us/step - loss: 3.6523e-04\n","Epoch 80/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 4.0751e-04\n","\n","Epoch 00080: loss did not improve from 0.00035\n","Epoch 81/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 3.5909e-04\n","Epoch 82/100\n","2636/2636 [==============================] - 1s 391us/step - loss: 3.7340e-04\n","Epoch 83/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 3.6224e-04\n","Epoch 84/100\n","2636/2636 [==============================] - 1s 385us/step - loss: 3.7237e-04\n","Epoch 85/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 3.7916e-04\n","\n","Epoch 00085: loss did not improve from 0.00035\n","Epoch 86/100\n","2636/2636 [==============================] - 1s 385us/step - loss: 4.1378e-04\n","Epoch 87/100\n","2636/2636 [==============================] - 1s 392us/step - loss: 3.4864e-04\n","Epoch 88/100\n","2636/2636 [==============================] - 1s 391us/step - loss: 4.2020e-04\n","Epoch 89/100\n","2636/2636 [==============================] - 1s 385us/step - loss: 3.7305e-04\n","Epoch 90/100\n","2636/2636 [==============================] - 1s 391us/step - loss: 3.3683e-04\n","\n","Epoch 00090: loss improved from 0.00035 to 0.00034, saving model to ./jawadmodel.h5\n","Epoch 91/100\n","2636/2636 [==============================] - 1s 387us/step - loss: 3.2529e-04\n","Epoch 92/100\n","2636/2636 [==============================] - 1s 384us/step - loss: 4.9510e-04\n","Epoch 93/100\n","2636/2636 [==============================] - 1s 383us/step - loss: 5.1318e-04\n","Epoch 94/100\n","2636/2636 [==============================] - 1s 385us/step - loss: 3.6971e-04\n","Epoch 95/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 3.6328e-04\n","\n","Epoch 00095: loss did not improve from 0.00034\n","Epoch 96/100\n","2636/2636 [==============================] - 1s 388us/step - loss: 3.3249e-04\n","Epoch 97/100\n","2636/2636 [==============================] - 1s 385us/step - loss: 3.2387e-04\n","Epoch 98/100\n","2636/2636 [==============================] - 1s 384us/step - loss: 3.3981e-04\n","Epoch 99/100\n","2636/2636 [==============================] - 1s 386us/step - loss: 3.7304e-04\n","Epoch 100/100\n","2636/2636 [==============================] - 1s 394us/step - loss: 3.9254e-04\n","\n","Epoch 00100: loss did not improve from 0.00034\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":21}]},{"metadata":{"id":"-iAV6rfs4Dq-","colab_type":"code","outputId":"ede1de4e-31bd-4d04-e353-509ba51df91c","executionInfo":{"status":"ok","timestamp":1554793166164,"user_tz":-300,"elapsed":2405,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["modelLoss = _.history['loss'][-1]\n","modelLoss"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.00039253752102385724"]},"metadata":{"tags":[]},"execution_count":22}]},{"metadata":{"id":"Y__-HEGx5dzC","colab_type":"code","outputId":"d62b6a31-a684-4ac6-8206-4e62d3c2ed5c","executionInfo":{"status":"ok","timestamp":1554793169768,"user_tz":-300,"elapsed":1556,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["phone"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["'L'"]},"metadata":{"tags":[]},"execution_count":23}]},{"metadata":{"id":"GjKD1mDybdML","colab_type":"code","colab":{}},"cell_type":"code","source":["!cp ./jawadmodel.h5 \"./drive/My Drive/Thesis Work/PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2/phone_models/\"$phone\".h5\""],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"text","id":"_PSH9Rrwb04P"},"cell_type":"markdown","source":["### TESTING"]},{"metadata":{"colab_type":"code","outputId":"b605ee30-b435-41a4-c1d7-aa79617c1df6","executionInfo":{"status":"ok","timestamp":1554793178117,"user_tz":-300,"elapsed":3959,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"0fEs9WhE6x5G","colab":{"base_uri":"https://localhost:8080/","height":50}},"cell_type":"code","source":["!ls './drive/My Drive/Thesis Work/PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2'"],"execution_count":0,"outputs":[{"output_type":"stream","text":["phone_means test_labels.npy test_typeOfError.npy x_test.npy\n","phone_models test_phones.npy train_phones.npy x_train.npy\n"],"name":"stdout"}]},{"metadata":{"colab_type":"code","id":"HGgx_Vmx6x5K","colab":{}},"cell_type":"code","source":["x_testALL = np.load('./drive/My Drive/Thesis Work/PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2/x_test.npy')\n","test_phones = np.load('./drive/My Drive/Thesis Work/PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2/test_phones.npy')\n","test_labelsALL = np.load('./drive/My Drive/Thesis Work/PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2/test_labels.npy')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"WWJBMRRy1R9y","colab_type":"code","outputId":"26d261a0-af27-43f2-f70e-dbe70e6cfdd7","executionInfo":{"status":"ok","timestamp":1554793189514,"user_tz":-300,"elapsed":14575,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["phone"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["'L'"]},"metadata":{"tags":[]},"execution_count":27}]},{"metadata":{"colab_type":"code","id":"61R7miB26x5M","colab":{}},"cell_type":"code","source":["#MAX normalization\n","train_max = np.load('./drive/My Drive/Thesis Work/PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2/phone_means/mean_'+phone+\".npy\")\n","x_test = x_testALL[test_phones == phone] / train_max\n","test_labels = test_labelsALL[test_phones == phone]"],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"code","outputId":"829afb49-b0b3-4efa-b0f9-cf95fd438c17","executionInfo":{"status":"ok","timestamp":1554793189519,"user_tz":-300,"elapsed":6966,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"zUUSaNfp6x5V","colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["x_test.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(1012, 13, 50, 3)"]},"metadata":{"tags":[]},"execution_count":29}]},{"metadata":{"id":"y08CFLq77i1N","colab_type":"code","outputId":"e6320c1c-9ed7-4a04-a245-d397fccfd48b","executionInfo":{"status":"ok","timestamp":1554793189521,"user_tz":-300,"elapsed":6599,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["test_labels.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(1012,)"]},"metadata":{"tags":[]},"execution_count":30}]},{"metadata":{"id":"GrxKoD7C_h4g","colab_type":"code","colab":{}},"cell_type":"code","source":["#x_test = np.vstack((x_test[test_labels == 1][:sum(test_labels == 0)],x_test[test_labels == 0])) #have to permute this also"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Tje3y8KDCT77","colab_type":"code","colab":{}},"cell_type":"code","source":["#test_labels = np.vstack((test_labels[test_labels == 1][:sum(test_labels == 0)],test_labels[test_labels == 0])) #have to permute this also"],"execution_count":0,"outputs":[]},{"metadata":{"id":"lIbxXmlBCxUO","colab_type":"code","colab":{}},"cell_type":"code","source":["#test_labels = test_labels.reshape(-1)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"NsbhuryoCOYG","colab_type":"code","colab":{}},"cell_type":"code","source":["#shuffledIndexes = np.random.permutation(range(x_test.shape[0]))\n","#x_test = x_test[shuffledIndexes]\n","#test_labels = test_labels[shuffledIndexes]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"jZFkKHEz-sgO","colab_type":"code","colab":{}},"cell_type":"code","source":["'''noise_factor = 0.5\n","x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape) \n","x_test_noisy = np.clip(x_test_noisy, -1, 1)''';"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Y_wyvIyNNgGv","colab_type":"code","colab":{}},"cell_type":"code","source":["autoencoder = load_model('./drive/My Drive/Thesis Work/PhoneLevel_Autoencoder_MFCC_TrainL2_TestL2/phone_models/'+phone+'.h5') #load previous one without noise"],"execution_count":0,"outputs":[]},{"metadata":{"id":"DcSsvupx7_ks","colab_type":"code","outputId":"ee0bd557-7897-4f95-a0e2-f058057710cc","executionInfo":{"status":"ok","timestamp":1554793194813,"user_tz":-300,"elapsed":6425,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":50}},"cell_type":"code","source":["autoencoder.evaluate(x_test[test_labels == 1],x_test[test_labels == 1],batch_size=1)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["912/912 [==============================] - 3s 3ms/step\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["0.0004416479345341111"]},"metadata":{"tags":[]},"execution_count":32}]},{"metadata":{"id":"HcbgxRQb8Jm3","colab_type":"code","outputId":"97d97e32-15ba-40f6-9b99-eb79c4445a83","executionInfo":{"status":"ok","timestamp":1554793194816,"user_tz":-300,"elapsed":6194,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":50}},"cell_type":"code","source":["autoencoder.evaluate(x_test[test_labels == 0],x_test[test_labels == 0])"],"execution_count":0,"outputs":[{"output_type":"stream","text":["100/100 [==============================] - 0s 902us/step\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["0.00046363551286049187"]},"metadata":{"tags":[]},"execution_count":33}]},{"metadata":{"id":"-CKVss601kio","colab_type":"code","outputId":"843e0e75-75cc-4f22-e980-857fc4cc639a","executionInfo":{"status":"ok","timestamp":1554793199718,"user_tz":-300,"elapsed":1099,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["x_test.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(1012, 13, 50, 3)"]},"metadata":{"tags":[]},"execution_count":34}]},{"metadata":{"colab_type":"code","outputId":"fbfbab95-caa2-4836-d703-90f077e5cb90","executionInfo":{"status":"ok","timestamp":1554793246976,"user_tz":-300,"elapsed":37942,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"HfysdAru6x5b","colab":{"base_uri":"https://localhost:8080/","height":154}},"cell_type":"code","source":["validationSetLength = 150 #initial some from test as validation set\n","accs = []\n","f1mis = []\n","\n","for threshold in np.arange(0.0000,0.00100,0.00001):\n"," predictions = [1 if autoencoder.evaluate(x_test[i:i+1],x_test[i:i+1],verbose=0)<=threshold\\\n"," else 0 for i in range(validationSetLength)]\n"," \n"," accs.append(sklm.accuracy_score(test_labels[:validationSetLength],predictions))\n"," f1mis.append(sklm.precision_recall_fscore_support(test_labels[:validationSetLength],predictions)[2][0])\n","\n","print(\"Max Achievable Accuracy (Overall): \", max(accs))\n","print(\"Max Achievable Accuracy (Overall) on Threshold: \", np.argmax(accs))\n","\n","print(\"Max Achievable F-1 score Threshold: \", np.argmax(f1mis))\n","print(\"Max Achievable Accuracy On This Threshold: \", accs[np.argmax(f1mis)])\n","print(\"Max Achievable F-1 score for Mispronunciations: \", max(f1mis))\n"],"execution_count":0,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/sklearn/metrics/classification.py:1143: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n"," 'precision', 'predicted', average, warn_for)\n"],"name":"stderr"},{"output_type":"stream","text":["Max Achievable Accuracy (Overall): 0.8\n","Max Achievable Accuracy (Overall) on Threshold: 89\n","Max Achievable F-1 score Threshold: 25\n","Max Achievable Accuracy On This Threshold: 0.19333333333333333\n","Max Achievable F-1 score for Mispronunciations: 0.3163841807909605\n"],"name":"stdout"}]},{"metadata":{"id":"02bhChZmEp3Z","colab_type":"code","outputId":"9a1c91f8-fd3c-47f7-cdc6-ea15f02a106c","executionInfo":{"status":"ok","timestamp":1554793392118,"user_tz":-300,"elapsed":896,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["0.0000+0.00001*25"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.00025"]},"metadata":{"tags":[]},"execution_count":36}]},{"metadata":{"colab_type":"code","id":"odgF6cpn6x5g","colab":{}},"cell_type":"code","source":["validationSetLength = 150 #initial some from test as validation set\n","threshold = 0.00025\n","\n","predictions = [1 if autoencoder.evaluate(x_test[i:i+1],x_test[i:i+1],verbose=0)<=threshold\\\n"," else 0 for i in range(validationSetLength,x_test.shape[0])] "],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"code","outputId":"f1a8d0c9-25f5-4a6a-e167-58666ca7ee33","executionInfo":{"status":"ok","timestamp":1554793401378,"user_tz":-300,"elapsed":607,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"4aOPssIK6x5i","colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["print('Accuracy = ', sklm.accuracy_score(test_labels[validationSetLength:],predictions))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Accuracy = 0.09164733178654293\n"],"name":"stdout"}]},{"metadata":{"id":"o8HhziiE2nIk","colab_type":"code","colab":{}},"cell_type":"code","source":["y_test = test_labels[validationSetLength:]\n","y_pred = predictions"],"execution_count":0,"outputs":[]},{"metadata":{"id":"0q1vQoQJ_rNz","colab_type":"code","outputId":"073c95bd-60e6-4bac-ca05-75e7609fc02e","executionInfo":{"status":"ok","timestamp":1554793404068,"user_tz":-300,"elapsed":1034,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":286}},"cell_type":"code","source":["print(\"Phone: \",phone)\n","print(classification_report(y_test, y_pred, target_names=['Mispronounced','Correct']))\n","print(confusion_matrix(y_test, y_pred, labels=range(2)))\n","\n","print(sklm.roc_curve(y_test,y_pred))\n","print('FAR = ', sklm.roc_curve(y_test,y_pred)[0][1]*100,'%')\n","print('FRR = ', (1 - sklm.roc_curve(y_test,y_pred)[1][1])*100,'%')\n","print(\"AUC_SCORE = \", sklm.roc_auc_score(y_test,y_pred)*100,'%')"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Phone: L\n"," precision recall f1-score support\n","\n","Mispronounced 0.08 1.00 0.16 72\n"," Correct 1.00 0.01 0.02 790\n","\n"," micro avg 0.09 0.09 0.09 862\n"," macro avg 0.54 0.50 0.09 862\n"," weighted avg 0.92 0.09 0.03 862\n","\n","[[ 72 0]\n"," [783 7]]\n","(array([0., 0., 1.]), array([0. , 0.00886076, 1. ]), array([2, 1, 0]))\n","FAR = 0.0 %\n","FRR = 99.1139240506329 %\n","AUC_SCORE = 50.44303797468355 %\n"],"name":"stdout"}]},{"metadata":{"id":"PHCbLEBIOj8F","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /PhoneLevel_OCSVM_LogfBank_TrainCamb_TestCamb.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"PhoneLevel_OCSVM_LogfBank_TrainCamb_TestCamb.ipynb","version":"0.3.2","provenance":[{"file_id":"1do_oe2nEvRd-R2ERrv85HTNXRn2Hsmqm","timestamp":1550321114470},{"file_id":"1aI0C9I0wveduUNsjU0XAT4bgEIxvEpr6","timestamp":1550077278747}],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"colab_type":"code","outputId":"1f547509-4436-4fd6-a8ac-ed7881f454f2","id":"d1eEVQxriiEp","executionInfo":{"status":"ok","timestamp":1551147828189,"user_tz":-300,"elapsed":68798,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":124}},"cell_type":"code","source":["from google.colab import drive\n","drive.mount('./drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n","\n","Enter your authorization code:\n","··········\n","Mounted at ./drive\n"],"name":"stdout"}]},{"metadata":{"id":"g2Osav3708xy","colab_type":"text"},"cell_type":"markdown","source":["Bism \n","# Imports"]},{"metadata":{"id":"j0MMAqy-tdOR","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":176},"outputId":"2c447f06-8f3b-49c2-8405-14f59dcd3111","executionInfo":{"status":"ok","timestamp":1551147843535,"user_tz":-300,"elapsed":7055,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}}},"cell_type":"code","source":["!pip install python_speech_features"],"execution_count":3,"outputs":[{"output_type":"stream","text":["Collecting python_speech_features\n"," Downloading https://files.pythonhosted.org/packages/ff/d1/94c59e20a2631985fbd2124c45177abaa9e0a4eee8ba8a305aa26fc02a8e/python_speech_features-0.6.tar.gz\n","Building wheels for collected packages: python-speech-features\n"," Building wheel for python-speech-features (setup.py) ... \u001b[?25ldone\n","\u001b[?25h Stored in directory: /root/.cache/pip/wheels/3c/42/7c/f60e9d1b40015cd69b213ad90f7c18a9264cd745b9888134be\n","Successfully built python-speech-features\n","Installing collected packages: python-speech-features\n","Successfully installed python-speech-features-0.6\n"],"name":"stdout"}]},{"metadata":{"id":"Nz23OfFgoMDo","colab_type":"code","colab":{}},"cell_type":"code","source":["import numpy as np\n","from tqdm import tqdm_notebook\n","import matplotlib.pyplot as plt\n","import os\n","\n","import pandas as pd\n","\n","#SKLEARN\n","from sklearn.model_selection import train_test_split\n","from sklearn.model_selection import GridSearchCV\n","from sklearn import svm\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import confusion_matrix\n","from sklearn.decomposition import PCA\n","import sklearn.metrics as sklm\n","from sklearn.manifold import TSNE\n","\n","from python_speech_features import delta"],"execution_count":0,"outputs":[]},{"metadata":{"id":"MAx3DP-2tT2q","colab_type":"code","colab":{}},"cell_type":"code","source":["train_featureDf = pd.read_pickle(\"./drive/My Drive/Thesis Work/monoPhoneDfCambridgeLogfBankE.pickle\")\n","#train_featureDf['deltaC'] = train_featureDf.logfBankE.apply(delta,N = 2)\n","#train_featureDf['doubleDeltaC'] = train_featureDf.deltaC.apply(delta, N=2)\n","\n","#remove stresses\n","train_featureDf.phones = [''.join([alpha for alpha in phone if alpha.isalpha()]) for phone in train_featureDf.phones]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"syX6BZUeuQJY","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":202},"outputId":"16a12d25-8c84-4ba1-b7c4-16cc9733ffec","executionInfo":{"status":"ok","timestamp":1551148784768,"user_tz":-300,"elapsed":979,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}}},"cell_type":"code","source":["train_featureDf.head()"],"execution_count":76,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
phoneslogfBankE
0EY[[-11.553039339574958, -11.62713318601026, -11...
1sp[[-16.980120121163033, -13.952385774196047, -1...
2sil[[-16.473187996012438, -14.6359224670427, -13....
3AH[[-8.260808385742848, -6.746990707037258, -8.6...
4B[[-7.607606722448552, -8.118466678578923, -7.7...
\n","
"],"text/plain":[" phones logfBankE\n","0 EY [[-11.553039339574958, -11.62713318601026, -11...\n","1 sp [[-16.980120121163033, -13.952385774196047, -1...\n","2 sil [[-16.473187996012438, -14.6359224670427, -13....\n","3 AH [[-8.260808385742848, -6.746990707037258, -8.6...\n","4 B [[-7.607606722448552, -8.118466678578923, -7.7..."]},"metadata":{"tags":[]},"execution_count":76}]},{"metadata":{"id":"1rRYEjaHuUkB","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"85f5cc1b-6ec8-4d66-b118-4699b3758be8","executionInfo":{"status":"ok","timestamp":1551148788619,"user_tz":-300,"elapsed":761,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}}},"cell_type":"code","source":["train_featureDf.phones.nunique() #42 phones"],"execution_count":77,"outputs":[{"output_type":"execute_result","data":{"text/plain":["42"]},"metadata":{"tags":[]},"execution_count":77}]},{"metadata":{"id":"Nyde0JguubLs","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"bab5759c-a414-4fdc-cc24-fc210de95d3b","executionInfo":{"status":"ok","timestamp":1551149202817,"user_tz":-300,"elapsed":2819,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}}},"cell_type":"code","source":["#x_train = np.array((train_featureDf.logfBankE,train_featureDf.deltaC,train_featureDf.doubleDeltaC))\n","x_train = np.array(list(train_featureDf.logfBankE.values))\n","x_train.shape"],"execution_count":94,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(20794, 57, 26)"]},"metadata":{"tags":[]},"execution_count":94}]},{"metadata":{"id":"xPXGmalbvjh-","colab_type":"code","colab":{}},"cell_type":"code","source":["#x_train = np.transpose(x_train,[1,2,3,0])\n","#x_train.shape"],"execution_count":0,"outputs":[]},{"metadata":{"id":"vC_5B989vvNw","colab_type":"code","colab":{}},"cell_type":"code","source":["for phone in set(train_featureDf.phones):\n"," np.save(\"./drive/My Drive/Thesis Work/onlyLog Phones/\"+phone+\".npy\",x_train[train_featureDf.phones == phone])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"iH4ZJX74oPWC","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":104},"outputId":"da0c039d-8bde-4e7d-e8b1-b6676d8cf576","executionInfo":{"status":"ok","timestamp":1551149234257,"user_tz":-300,"elapsed":3826,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}}},"cell_type":"code","source":["!ls \"./drive/My Drive/Thesis Work/onlyLog Phones/\""],"execution_count":98,"outputs":[{"output_type":"stream","text":["AA.npy\tAY.npy\tEH.npy\tHH.npy\tL.npy\tOY.npy\t S.npy\t UH.npy ZH.npy\n","AE.npy\tB.npy\tER.npy\tIH.npy\tM.npy\tP.npy\t spn.npy UW.npy Z.npy\n","AH.npy\tCH.npy\tEY.npy\tIY.npy\tNG.npy\tR.npy\t sp.npy V.npy\n","AO.npy\tDH.npy\tF.npy\tJH.npy\tN.npy\tSH.npy\t TH.npy W.npy\n","AW.npy\tD.npy\tG.npy\tK.npy\tOW.npy\tsil.npy T.npy\t Y.npy\n"],"name":"stdout"}]},{"metadata":{"id":"OJZ-97pDxU_y","colab_type":"text"},"cell_type":"markdown","source":["# Check LogFBankEnergies"]},{"metadata":{"id":"_dUGpFu4fOfK","colab_type":"code","outputId":"207c218d-be97-4278-b203-a44f60dead9b","executionInfo":{"status":"ok","timestamp":1551149237331,"user_tz":-300,"elapsed":772,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["phones = [file[:-4] for file in os.listdir('./drive/My Drive/Thesis Work/onlyLog Phones/')]\n","len(phones)"],"execution_count":99,"outputs":[{"output_type":"execute_result","data":{"text/plain":["42"]},"metadata":{"tags":[]},"execution_count":99}]},{"metadata":{"id":"QuqcCAIQgbro","colab_type":"code","colab":{}},"cell_type":"code","source":["phone = 'L'"],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"text","id":"SdjP405Hb02v"},"cell_type":"markdown","source":["### Training"]},{"metadata":{"id":"uP13eGWHYoaF","colab_type":"text"},"cell_type":"markdown","source":["#### X_TRAIN, X_TEST & Y_TEST"]},{"metadata":{"id":"atqHhNsiY1tU","colab_type":"code","outputId":"7b3b6395-20e6-41d7-ffed-692a52dba8aa","executionInfo":{"status":"ok","timestamp":1551149252481,"user_tz":-300,"elapsed":820,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["X = np.load('./drive/My Drive/Thesis Work/onlyLog Phones/'+phone+'.npy')\n","X = X.reshape(X.shape[0],-1)\n","X.shape"],"execution_count":103,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(1093, 1482)"]},"metadata":{"tags":[]},"execution_count":103}]},{"metadata":{"id":"NPqtLRyekdek","colab_type":"text"},"cell_type":"markdown","source":["90% for Train"]},{"metadata":{"id":"V6rS1xdalKtU","colab_type":"code","colab":{}},"cell_type":"code","source":["np.random.shuffle(X)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"ntXsCyOYkaGd","colab_type":"code","outputId":"5326b7eb-95dd-462c-bb40-a5eb02cfdc12","executionInfo":{"status":"ok","timestamp":1551149253799,"user_tz":-300,"elapsed":870,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["X_train = X[:int(np.ceil(X.shape[0]*0.9))]\n","X_test = X[int(np.ceil(X.shape[0]*0.9)):]\n","y_test = np.ones(X_test.shape[0])\n","X_train.shape[0]+X_test.shape[0]"],"execution_count":105,"outputs":[{"output_type":"execute_result","data":{"text/plain":["1093"]},"metadata":{"tags":[]},"execution_count":105}]},{"metadata":{"id":"aBDWvchTmNFk","colab_type":"code","outputId":"50ed36c2-ffdf-4a5f-f62b-b9e7a48fa70e","executionInfo":{"status":"ok","timestamp":1551149254593,"user_tz":-300,"elapsed":849,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["y_test.shape"],"execution_count":106,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(109,)"]},"metadata":{"tags":[]},"execution_count":106}]},{"metadata":{"id":"jAICZXldmhEj","colab_type":"code","colab":{}},"cell_type":"code","source":["for p in phones:\n"," if p != phone:\n"," notPhone = np.load('./drive/My Drive/Thesis Work/onlyLog Phones/'+p+'.npy')\n"," notPhone = notPhone.reshape(notPhone.shape[0],-1)\n"," np.random.shuffle(notPhone)\n"," X_test = np.vstack((X_test,notPhone[:int(np.ceil(y_test.shape[0]/41))]))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Y5HgVmAfpJ7g","colab_type":"code","outputId":"c98012ba-eb88-4fcd-9b12-5e6d82adcea5","executionInfo":{"status":"ok","timestamp":1551149270197,"user_tz":-300,"elapsed":781,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["y_test = np.hstack((y_test,np.zeros(int(np.ceil(y_test.shape[0]/41)*41))))\n","y_test.shape"],"execution_count":109,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(232,)"]},"metadata":{"tags":[]},"execution_count":109}]},{"metadata":{"id":"xV6AXrh1phQ6","colab_type":"code","outputId":"218e48d0-53e9-47e8-8d0b-fd2d7cfd10cd","executionInfo":{"status":"ok","timestamp":1551149272778,"user_tz":-300,"elapsed":777,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":260}},"cell_type":"code","source":["y_test"],"execution_count":110,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n"," 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n"," 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n"," 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n"," 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n"," 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n"," 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n"," 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n"," 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n"," 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n"," 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n"," 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n"," 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n"," 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])"]},"metadata":{"tags":[]},"execution_count":110}]},{"metadata":{"id":"F2BW-Mtvoccd","colab_type":"code","outputId":"c7c10160-43bb-44e8-d6b9-13a543e6c54c","executionInfo":{"status":"ok","timestamp":1551149275638,"user_tz":-300,"elapsed":1421,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["X_test.shape"],"execution_count":111,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(232, 1482)"]},"metadata":{"tags":[]},"execution_count":111}]},{"metadata":{"id":"aABYxtwYnLWo","colab_type":"code","outputId":"9acb10a9-02ce-40b7-f0f1-77b1a369b02a","executionInfo":{"status":"ok","timestamp":1551149275640,"user_tz":-300,"elapsed":1281,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["X_train.shape"],"execution_count":112,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(984, 1482)"]},"metadata":{"tags":[]},"execution_count":112}]},{"metadata":{"id":"nrlW8-ZX8mHr","colab_type":"text"},"cell_type":"markdown","source":["#### Visualising the features"]},{"metadata":{"id":"5YZ0th9wx1Qy","colab_type":"code","colab":{}},"cell_type":"code","source":["def plotTrainTestScatter(phone, phones):\n"," print(\"Phone: \",phone)\n"," X = np.load('./drive/My Drive/Thesis Work/Log Phones/'+phone+'.npy')\n"," X = X.reshape(X.shape[0],-1)\n"," np.random.shuffle(X)\n"," \n"," X_train = X[:int(np.ceil(X.shape[0]*0.9))]\n"," X_test = X[int(np.ceil(X.shape[0]*0.9)):]\n"," y_test = np.ones(X_test.shape[0])\n"," \n"," for p in phones:\n"," if p != phone:\n"," notPhone = np.load('./drive/My Drive/Thesis Work/Log Phones/'+p+'.npy')\n"," notPhone = notPhone.reshape(notPhone.shape[0],-1)\n"," np.random.shuffle(notPhone)\n"," X_test = np.vstack((X_test,notPhone[:int(np.ceil(y_test.shape[0]/41))]))\n"," \n"," y_test = np.hstack((y_test,np.zeros(int(np.ceil(y_test.shape[0]/41))*41)))\n","\n"," X_test2D = TSNE(n_components=2).fit_transform(X_test)\n"," X_train2D = TSNE(n_components=2).fit_transform(X_train)\n","\n"," print(X_train2D.shape)\n"," print(X_test2D.shape)\n","\n"," plt.scatter(X_train2D[:, 0], X_train2D[:, 1],c='g')\n"," plt.scatter(X_test2D[:, 0][y_test == 0], X_test2D[:, 1][y_test == 0],c='r')\n"," plt.scatter(X_test2D[:, 0][y_test == 1], X_test2D[:, 1][y_test == 1],c='b')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"ARTIaX_1xSaG","colab_type":"code","outputId":"ae88e20f-5c2a-4733-fac4-d20d0f00766b","executionInfo":{"status":"ok","timestamp":1551148281620,"user_tz":-300,"elapsed":28107,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":399}},"cell_type":"code","source":["plotTrainTestScatter(phone,phones) #TSNE dependent on complete dataset?"],"execution_count":30,"outputs":[{"output_type":"stream","text":["Phone: L\n","(984, 2)\n","(232, 2)\n"],"name":"stdout"},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAeQAAAFKCAYAAADMuCxnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3XlYlWXewPHvWYDDw6JyRJBFhMpi\n0daZckEtbUHba7IsyxrHpnemmaaZstQpZl5x0poW683JTNuzySlbhLJlMpfSqbEU0NwQAQEFVISH\n5WzvHwSCHOBwODu/z3V1XXLOs9zcHZ7fubffrbHZbDaEEEII4VVabxdACCGEEBKQhRBCCJ8gAVkI\nIYTwARKQhRBCCB8gAVkIIYTwARKQhRBCCB+g9+bNjxw54c3b99mgQQpHj6reLka/JfXvXVL/3iX1\n7119qf/o6Ai7r0sLuQ/0ep23i9CvSf17l9S/d0n9e5c76l8CshBCCOEDJCALIYQQPkACshBCCOED\nJCALIYQQPkACshBCCOEDJCALIYQQPkACshBCCOEDJCALIdxONakUHd+PapJEFkJ0xauZuoQQgc1s\nNZO9eR55+9dSVldKfHgCWSlTyR6Tg14rjx8h2pO/CCGEy6kmleLaAyzeupC1RR+0vV5Sd5Bl25cC\nsGDcIm8VTwifJAFZCOESqkmlrK6UF39Yyj93vYVq7bp7Oq8ol7kXPooSpHiwhEL4NgnIQginqSaV\nQ3VlLN+xlHUHPqG0rsSh8w7VlVKpVpA8IMXNJRTCf0hAFkL0mtlq5r6P7+PdgvcoqTvY6/PjwhOI\nUWLdUDIh/JcEZCFEl1STSqVaQYwS26F7OXvzvLaxYGdkJU+R7mohTiEBWQjRFngjgyOpba7FaBjM\n4v/kdJgdfenwy5g18h4MegMf7l3j9L2uOe16ssfkuLD0QgQGjc1ms3nr5keOnPDWrV0iOjrC738H\nfyb133cnlyV9REldCTqNDovNQpgujHpLvd1ztGixYnXqfnqNnr2zSqV17ALy+feuvtR/dHSE3del\nhSxEP3Zq17PFZgHoMhgDTgdjgOmpt0kwFqILkqlLiH7AXqas6oaqPnU991bG4JE8Nv5Jj91PCH8j\nLWQhApi9TFmXJF3KiaZavir7kqqGI24vQ6wylKyUK8kZt0iycwnRDfnrECKAndolXVJ3kFcKXnL7\nfS9NuoxF45/CZDV1mqEthLBPArIQAUo1qeTtX+vx+541KI1XslZJa1iIXpIxZCECVKVaQVldqcfv\n+5exCyUYC+EECchCBKgYJZb48ASP3lODhlHRozx6TyEChQRkIQKUEqSQlTLVo/fUarQ89d3jmK1m\nj95XiEAgAVmIAJY9JocbzrjJY/ez2Cws276U7M3zPHZPIQJFnwJyY2MjkydP5t1336W8vJwZM2Yw\nffp0fv/739Pc3OyqMgohnKTX6vn7xCWE6cM9et+8otwOa56FED3rU0BeunQpAwYMAGDJkiVMnz6d\nN998k6SkJFavXu2SAgoh+kYJUrgl9VaP3vPQiZbtFYUQjnM6IO/bt4+9e/cyceJEALZs2cKkSZMA\nuPjii/n6669dUkAhRN/9dezfmD3qHuLDE9GiJT48kVkj7+ZGd3Vna+CFH56TsWThlOqGKj4r/oRt\nlf/tVz0tTq9NWLRoEX/+859Zs6Yl9V5DQwPBwcEAGI1GjhzpOQPQoEEKer3O2SL4hK6ShAvPkPp3\n3AvXPY9qeoLyE+UMjRiKEqSgmlS2PvcNB2t7v6dxdyw2CyvylxMRFsbTVzzt0muLkwLt899obuSi\n5RexvXI7Nlr2PQrSBjH7/Nk8fcXTPrecztX179Rvt2bNGs455xwSExPtvu/oBlJHj/r3Nx/ZbcW7\npP6dE8kQ6o9ZqKel7q4YPtXu3sYDggZgCDJQqVY6fa+X/ruCezMeIDIk0ulrCPsC8fN/yT/Hkl+1\no8NrJquJ//vP/3Givp7fnHufz2R+85ndnr788ktKSkr48ssvqaioIDg4GEVRaGxsxGAwUFlZyZAh\nQ5wqqBDCs1r3Js4ryqXsRAkxYbFkJU9hwbjFfFnyOdPX/sLpa9eZTjB/44MsmfQPVxVXBKjqhioK\nqwq7fP/Vwpd5tfBlYgyxXDPiOrLH5Phci7mvnPptnn76ZBfUs88+S3x8PNu2beOTTz7hmmuuYd26\ndWRmZrqskEII99Fr9SwYt4i5Fz5KpVrRoQVy7pDz2/ZIdtaGsg2oJtUnWjXCdxVWF2Cl589ZZWMF\ny7YvpdnSzOIJT3mgZJ7jsnXI9957L2vWrGH69OkcO3aMa6+91lWXFkJ4gBKkkDwgpUPgNIYOJtWY\n1qfrlteVyYxr0aM0YzpaHJ9T9HLBS9Q01LixRJ6nsTk64OsG/j7+EYhjOP5E6t8zGs2NTHl3EgVV\n+W0TbXojOnQI/7ltu7SQXSwQP//2xpC7Mzwyha23fe/GEnXNHWPIkqlLCNEtg97AFzdtYvsdu8ka\nPhVF17vAekXyVAnGwiG5139OujEDDRqHjj9Ye4Dqhio3l8pzJCALIRwSExbDK1PeovCu/Q6n44wy\nGFk0/u9uLpkIFAa9gX9P20zhnfu4LOmKHo+3YqWwusADJfMMCchCiF5RghSenfQPfn/h70mMSEKL\nlpjQWKIMUeg0LWOAWo2WtKh0vr99Z8DNhBXuFxYUzn8qtvZ4nAYNacZ0D5TIM+QvRQjRa3qtnqev\neJo/jHq4w8zs6oYqCqsLSDOmYwwd7O1iCj91+eqJHG3qecLWaQNOD6jPmQRkIYTTWmdmtzKGDiYz\nYYIXSyT8XWV9JTtrul6P3N7aGz51c2k8S7qshRBC+ATVpPKbz2Y5dOyNp09jkCHKzSXyLGkhCyGE\n8Cqz1Uz25nl8tPcDDqllPR6vRcuSyZ3Tvfo7CchCCCG8KnvzPLv51O0J0YawY+aegJwsKF3WQggh\nvEY1qeTu+8ihY69Ovo6SXx9hoGGgm0vlHRKQhRBCeE2lWkFpfUmPx40YOIJ/XP6SB0rkPRKQhRBC\neE1kcM9bc+o1enJv+CIgu6nbk4AshBDCaw6rh3s85urTru0Xe2pLQBZCCOE1d6+7q8djnpu8zAMl\n8T4JyEIIIbyi9EQJu452nwSkcOb+gO+qbiUBWQghhEeZrWbmbniAMW+c3+1xyy99lcFK4KTG7En/\n+NohhBDCZ2RvnsfyHS/0eJxO17/ajP3rtxVCCOFVqknlo70fOHTs2dHnurk0vkUCshBCCI+pVCsc\nSo8JYLKa3Fwa3yIBWQghhMc4su4YYFDwIGKUWDeXxrdIQBZOqW6oYkPpeqobqrxdFCGEH6ltrnXo\nuInDJqMEKW4ujW+RSV2iVxrNjUx5dxI7qwux2CzoNDpSjWnkXv85Br3B28UTQvi4GCWWhLAESutL\nuz3ulrNu9VCJfIcEZNErU96dRH7VjrafLTYL+VU7uGL1xSy9dDmN5kYMegNJkckAFNcWARqGKEOo\nba4lRontd996hRAnKUEKAwwDewzIqcZ0D5XId0hAFg6rbqhiZ7X9RfyFNQVMeHt028969Ng0Niw2\nCwAaNNiwkRCWyJTTriR7TE6/WewvhDhJNakU1x7o8bhntz3JgnGL3F8gHyJjyP2EalIpOr4f1aR2\n+1p3CqsL2gJsT8yYOxxrwwZAaX0Jy7YvZf7GB3tReiFEoCiuPUCdqa7H49bsftfhZ1OgkCZKgDNb\nzWRvnkfe/rWU1ZUSH57A5clZAHxSlEdpXQkxoTGMSxjPY+Of7DaBe5oxHZ1G53BQ7s4rBSuxoSFn\n3CJpKQvRr9gcOupwYyWVagXJA1LcXB7fIU/CAJe9eR7Lti9t+7mk7mCnDDkVDRWs3vNP3tv7Ljec\ncSN/Hv2/qOb6TuO9xtDBnDFoBLtqdva5XBabhZX5LxKk1fe7bikh+rOkyGTC9OHUm7tvJYdoQ2TZ\nkwgcqkklb/9ah4+32Mz8c/cqRr0yggvfOIdxb/6M+RvnYLaaqW2q5Xef/5ofa3a5tIx5Rbn9rltK\niP5MCVK4JbXnGdQ6rc4DpfEt0kIOYJVqBWV13c9ktOfU8d7NhzZy4HiRQ+M+vXWorrTfdUsJ0d/9\ndezfMFtNvFywostjmsxN/e7ZIC3kABYZHEmMEtPn6+RX7XBLMAaIC0/od91SQvR3eq2exROe5raz\n7ujymLiI/vdskIAcgMxWM/M3zuGydyZSrpZ7uzjdykqeIuuSheinFk98iozBI+2+d+qzoberQvyR\ndFn7keqGKrYd/g6jIZozo84CWrqlI4MjOyTdOHUiV3uumiXtrFglliMNR4gLTyAreQrZY3K8VhYh\nhHfptXrW3bieeRvn8HHRWg7XVxIX0fHZUNtUy/yND7Kh9CvK6w8RH55AVsrUgMxlEFi/TYAqO1HG\ntWuyKD5xoO01DVoUXSj1lvq2IJsQlshlyZfz6YFPuryWN4MxwGXDr+A3594nGbuEEEBLUH509P9y\n85m3cqiulLjwhLYGx5yv/sjbu95ENde3HV9Sd7CtwRFoKzQ0NpvNsUVhbnDkyAlv3doloqMj3Po7\ntOaNbp+q0t8lRiSx4eYtLgnG7q5/0T2pf+8KhPqvbapl7oYHyN3/IXXtlkEFEYQhOJQT3WxE4cpn\niTP6Uv/R0RF2X5cWsg8LtGAMMqtaCHEyYdEbBa9Sb6nv9L4JE6bm7vdCDsRniQRkH9Vd3mh/JrOq\nhRCPbHq4U4Ki3grEZ4nMsvZRvckb7U9kVrUQ/ZtqUlm1840+XycQnyXSQvZRrswb7U6tuzh1974G\nDfERiQEzq1otraZ6/U6ig48ysOxHGqdeDfEJaCsrsMbEghJYDwkhXKm4tqjDeLEzpiZfFRDPklNJ\nQPZRxtDBpBrT/GIMeXjEcA60mwHe3h1pd3HPufcGxKxqc10jCy/4mI9qxnGQSxlGCVdTyRMLL0SH\nBQ1gTRxGU9ZU6rNzQC9/XkJ0punj2VqemPhMwC15Aumy9mm513/e5aJ5XxEbNpTPbtrIHem/RNGH\ntb0eHhTOrJF3s3D84yQPSPH7YAyw8IKPea5mBgdIxoqeAySzhD/wAI+jpeUxoys5iLJsKWHZ87xd\nXCF8UlLk8A7Pit46beBpGEMHu7BEvkMCsg8z6A18cdMmdtyxm6kpVxMfnoBOoyMxIol0Y4a3iwfA\nFclTiQyJ5PEJT1F45z7WT/ua9dO+IX/mXhZmPh4w32LVA4f5sGac3ffe5xpUQju8FpKXC2rgZhQS\nwllKkMJNI252+vzs0YHXVd0qMJ6WAS4mLJaVV7yOalLbMnNd9s5EbxeLdGMGOe0W5itBCqnGdC+W\nyH3qH1xECc/bfa+ERMoZymnsb3tNe6i0ZUw5OXCWZAjhKgvHP863h7c6NSR3RK10Q4l8g7SQ/YgS\npJA8IIXD6mGndnFytdFxYwOmBdwd8+EanvvybDRdTF5LpIShdMwZbo1LaJngJYTopDVl5sz0u1B0\nvRvOmjDsEjeVyvskIPuR1k0jpn90I1as3i4Onxz4OKATvbdaOO7fLOU3WAiy+/41vI9CQ4fXmrKm\nyGxrIbrRuuNT4V37uSIpy6FzogxRJEQkurlk3iMB2Y+0bhpRVu/91jHAoRMtmXICmVpazYfHJth9\nT4eJ/+FZHucBrIANsCQmoc6+p2WWtRCiR0qQwnOTXyRcH97tcVEGI9/elu+hUnlH4Pc3BgjVpJK3\nf623i9FBiD4k4DLlnKr66/2UMN7uezbgfp5GOziK6g1b0NbWyjpkIZwQGRLJ9LQZdnepiw9P4L1r\n1jJ8QLIXSuZZEpD9RKVa4RPjxv2NcXQKwyjhAJ0fBjosDKIa65BEMA7GagzMpRhCeEJroo+8olzK\nTpQQExZLVvIUFoxb3C/mqoB0WfuNGCWW+PAEbxejgwZzQ8B3WSuxA5hCrt33TBi4hM/RnqiVJU5C\n9JFeq2fBuEVsuHkLX9/6X76e/l8eG/9kh2CsmlSKju8P2LkrEpD9hBKkkJUy1S3X1ml0aNCg19qf\ntNSVhIhhAd9lHTZvDl+S2eX7P3AONaUNaCsD+4uJEJ7SupqkfTKh1gmtmW/9nAvfOIfzX81g9icz\nqW3qentGf9SngLx48WKmTZvGDTfcwLp16ygvL2fGjBlMnz6d3//+9zQ3N7uqnIKWLp3Zo+4hMSLJ\npde9Pf0uvrl1G7vuLOqUcas7gZjcvQNV5UTuJnaR1s1BWrYYr5AlTkK4UeuE1pK6gwBUN1WxZt+7\nnPFSIg999UfMVrOXS+gaTnfMf/PNN+zZs4e3336bo0ePct111zF69GimT59OVlYWTz75JKtXr2b6\n9OmuLG+/1tqlM/fCR9l3bC9Xv3cF9U4nadeQGDGsbcOH1m6hxyc8xV/G5FBcW0SjuRGD3kB8eCIL\nt2STV5RLZX1FQG0U0R1tZQXbK2Oxouv2OGNkEwQHe6hUQvQvqkkld99Hdt+zYWNF/ottz0Z/p7HZ\nbF1v1dMNi8VCU1MTiqJgsVgYM2YMYWFhfPzxxwQHB7Nt2zZWrFjBs88+2+U1jhw54XTBfUF0dIRX\nf4f5G+fYnZUIEB4UAUC9qQ5FH4bNZqXR0sjQ8HguH345s0beQ1x4fK9auK2Zwnxlowi317+qYh19\nGfHl33a5BlmLiSKSGTz7auoX+P8DoTe8/fnv7/pL/Rcd38+Fb5zT7TGxobF8c9v3Hn0u9aX+o6Mj\n7L7udAtZp9Oh/LS8Y/Xq1YwfP56NGzcS/FNLwWg0cuTIEWcvLxzQflbiobpS4sITmJx0GbNG3t02\nAaw1gLb/t7Mf2taxnX5DUYi4aiwjl23ne863e4gVPUkUc/ayHXz4u1oMQyI9XEjf42tf3IR/iwyO\n7HGb14qGCirVCr9/PvV5Lvlnn33G6tWrWbFiBZdddlnb6440vAcNUtDru+8O9HVdfdPxlBeuex7V\npFJ+opyhEUM7PQCTiLH770Dh9vr/vyV8HfQQF70QwnZzKra2fZ1aaQAdP3AOV2cWsa3Gu58HT2tf\n/2armT+t+xPv73qfg8cPkhCZwPik8fxxzB9JiEzgeONxu59R4TxvP388obbmcLfBuFVYpN7j9eHq\n+zndZQ2wYcMGnnnmGZYvX87AgQOZNGkSa9euxWAwsHXrVl5//XWWLFnS5fn+3t3SX7qMfJVH67+6\nivq0i7nAtoXqLr7YaDFR8N8ajAn9I+CcWv/dDaG0ijHEcM2I6zvMWxDO6S/PH9WkMvbN8ymrL+v2\nuJ/FXMTaG9Z5qFTu6bJ2epb1iRMnWLx4MS+88AIDBw4EYMyYMXzyyScArFu3jszMrpeLCOFPtAeL\nMdt0VBPd5TFW9Oz69JAHS+U7HM0kV9lYybLtS3noqz96oFQiEChBCpkJ9tPXtvefym/8fhmU0wE5\nNzeXo0ePct999zFjxgxmzJjBr3/9a9asWcP06dM5duwY1157rSvLKoTXaKurGcAxtFi6PgYzGQP6\nZza13maSe7VwZcAmdxCut2DcYhRdz8sxH1j/Ow+Uxn2c7jOaNm0a06ZN6/T6ypUr+1QgIXyR+dzz\nOM4grN18hz2T3QyYkNH2s6pCZaWGmBhbwKe3jlFiiQuPp7SuxOFzth76holJgbuVnnCdyJBIbku/\nvcchkQ2lG1BNqt/OU5BMXUI4wjiY6NRBJHHQ7tt6Gvn3mbPBOBizGebMCWb0aIXRo8PIzFSYPz8Y\nc2DkLrBLCVIYG9+7IarH/7MwYBI6CPfLHpPDzWfe2u0x1Y1Vfp3OVwKyEA5q/iSXq6I22n3v11Hv\n0PjBGnbs0JCZGcrKlSGUl+uwWjWUlOhYtiyE7OzATh6SM25xj1votfefw1t56Kv73VgiEUj0Wj1L\nJi3lzIGpXR7j7+l8JSAL4SiDgbn51zJ7xnGGDVHRaW0MG6Iy69bj1F55E2nnGpk0KYx9++yPBOXl\n6QN6D4rWLfR649XCl5m4agyN5kY3lUoEmk9vWk+UwWj3PX9P5ysBWYhe0Othwd+1fLXVwuav6/li\ns4Vvfgjj1VdDUNVT1yh3dKhMS2Vxk+cK6wXZY3KYmX5Xr84prMlnyruT3FQiEWgMegP5M/dwe9pM\nokOHoEFDfHgiN55xE1ckTeWz4k+obqjyy52hZCGgEE5QFEhOtjFnTjD5+Y79GQ21lpA6fTK6qZOp\nz85pie4BRq/VE6wL6fV5BVX5VNZXEhMWeMlrhOvptXqemLiER0Yv4OENf+KjvR+wes8/Wb3nnyeP\nQY8ZM7FKLGPjJ7Bo/N+JDPHtTHrSQhbCSaoKH3/seFC9nFwiyvagLFtKWPY8N5bMexxdj3wqGzbu\n//K3fteiEd712Nb/5Z3dq2iwdv7MmGmZMFihVvCvPW9z1srhzN3wgE9PJJSALISTKis1VFY6/idk\nwtD275C8XAJxQLm365Hb+7T4Ey584xzGvnk+8zfOcfjB6Y9dk6LvVJPKWztfd/h4s9XM8h0v8Mim\nh91Yqr6RgCyEk2JibAwZYnX4+PVMQCUUAO2hUrSV/rs8oyut65H7oqy+jGXbl3b74FRNKt+Wb+X2\ntdO46PVzuOiNcxn75gW9CuTCvxXXFlFvru/1eW/tfN1nv7xJQBbCSYoC48Z1nbnrVKUkUM5QAKxx\nCVhj/Hd5RlecWY/clVW73uz04DRbzfzpy99z+vIEprw3mY+L86hoqMCGjbL60h4DuQgczs7MrzfX\nU1x7wLWFcREJyEL0wWOPNaHXO9ZKTqSEoZQD0JQ1hUBN35UzbjFh+p7THPakznSC4tqitp/NVjOX\nrZ7Aq4UrMdu6bgW/nP+S3+c0Fj0z6A09H9SFRnODC0viOhKQheiDyEiYOdPk0LFX8wEhidGos+9p\nmWUdoCJDIrnqtGtcdLWTy8jmb3yQ/KodPZ5htpkl4Ug/kBSZ7FB+a3sM+lAXl8Y1JCAL0Ud//Wsz\ns2Y1ERZmBWyd/kuMbWL2jOPM3XQxNRu2Ur9gUUAueWpvwbjFKPq+9QCEB0WQFDkcaBkzzt3n+Ozt\n9aXrfXacULiGEqQwPe22Xp8Xrg9v+1z5msB+KgjhAXo9LFzYzPz5zRQXazh2DOrrNZx1lhWTqXVz\nCS2QYvf8TptQqCrayoqWMWY/7daODInkhhHTeK3Q+c1mbj5relvWpUq1goqGcofPPdJQSaVaQfIA\n+3UuAsNfx/4Nq83KivwXHT5nSsqVPpvNS1rIQriIokBqqo3Ro21MnmwlIaEleUiHmKqqaIv2g6pi\nNsNDD7VsQnHhhWFceKHCvaML0I+5mKjR5xGV+XPC5s/BX3el+PXZv3H63GlnTuevY//W9nNkcCTa\nXjyujIZov85pLByj1+p5bPzf2XXnAVIGnN7j8eFBESzMfMIDJXOOBGQhPMFsJmz+HKIyf07U6POI\nHHMhV6RVsWJFyyYUoKGyUsfb+y5iyKFt/I/1GWwlZS1JROY/6O3SOyU+PIFYZWivz4sKGcyi8U+i\n157swKttrsWK40vMpqZM9dlWkHC9qNAovrn1v+y4Yw9Thl+JTqOze9z01Nt8OluXBGQhPCAsex7K\nsqXoSg6isVq5/9Af2X4s2e6xZkL4B7/lZ2zBjI7QlS8R9uAf/K6lrAQpZKVc2evzrjytc5dijBJL\nYniiQ+cPConisfFP9vq+wv/FhMXw8pQ3+fGuYm484yZilVi0aEmMSGL2qHvIHuPbkyllDFkId1NV\nQvJOTkhSCWUN1/Z42vecz+95iv+z/Q7l5ZcgOLhlQpgfyRm3iI1l69lzdLdDx0cZ7AfT1uDe0wb1\ng4IH8cMduzq0rkX/ExkSyfOXLkc1qVSqFcQosX7RYyItZCHcTFtZgbbsZDrJcoZyCMeyWa3h2rbs\nXiG5H/lduk29Vk/e9V/0uE+yFi1pURl8f3vXwTR7TA6zR91jt6WsRcv0s26n4K59fVqfKgKLEqSQ\nPCDFL4IxSAtZCLezxsRijU9AV3IQgAEcQ4sFqwPfhw/9lN3rNPajPVTWMvs62b9mDrfuk2yvdXvL\nWbdx44hppBnTMYYO7vY6eq2eBeMWMffCR6lUKwjSBrGruhBj6GDOjEr1m4euEF2RgCyEuykKTVlT\nUZa1BKTjDHQoGLcKomUPZeuQGL9Nt9k6dpdXlMuhulLiwhPISp5C9picXncvt7Z6ABIiHBtXFsIf\nSEAWwgPqs3PAbCJ0xXKGUk4SBynG/qSuU+3lDAZTw/6xdxKBgj+2A09t3frLmJ4QniRjyEJ4gl5P\n/WNP0nDHL1Fo4FrWOHiimbe5iTT9bs5+bwGZmQrz5wf724TrNv42pieEJ0kLWQgPqv/b4xASzKKP\nlmA7BO9zDcUMp6vvxoM1x1hmu4ef9lqnpETHsmUtaywXLGg+eWAAZPcSor+TFrIQnqTXU79gEbWb\nv+GRTeP59rYnKAy9gLtYhp5m2ufATj1NxTB0kN3L5Obq2blTg1prJuyh+4kafS5RF53r99m9hPAE\n1aSys7qQndUFqCYV1aRSdHy/1/Ofa2w2m81bNz9y5IS3bu0S0dERfv87+LOAqX9VRVt8ALVRw87G\nFKrrQzn3XCu1tRpGjw7DatXYOanlzzZOV8FUyxr+yFMkUopCy7Zy6ux73L5mOWDq309J/fee2Wrm\nkU0Ps2rXG9SZ6gDQoSNYF0KDRSXaMITzY87n8YlLiAmL6fZafan/6OgIu69LQO4D+YPwrkCvf1WF\nzEyFkhL7aQA7shJPCT9nK8/zG6ITFWo2bHFr93Wg17+vk/rvvfkb5/SYXKbV6QPPYO31nzLIEGX3\nfXcEZOmyFsJHKQpkZTna9ayljCTe4xfEUc4FJf+ieV9pz6cJ0U+oJpXc/R85fPzeY3tIXZHCnXm3\nUllf6caSnSQBWQgflp3dzOzZTSQmWtBqW8eXu2dDx/ecz5Qbu0+0IUR/UqlWcKiurFfnWLGytuhD\nRr0ygkv+OZZGc6ObStdCArIQPkyvb5lNvWGDynXXNQP2xpPtKzgaR3Wpf6XaFMJdYpRY4sIdS1l7\nKhs28qt2MOXdSS4uVUcSkIXwE1u3ODKWfJIFPbvWV7upNEL4FyVIYYoTu4+1t7O6kOqGKheVqDMJ\nyEL4gcpKDWWHeheQdZhJS5EWshCtssfkMGvk3WidDH0Wm4XC6gIXl+okCchC+IGYGBvxcZZenTNS\nU8CgsyXXsxCt9Fo9CzMf5+NpZjXiAAAgAElEQVQb/+3U+TqNjjRjuotLdZIEZCH8gKJA1hRHA7KV\nc/iOz+9YLlm7hLBjxKAzGaoM7fV5qca0Hncl6wsJyELY4SuZe9rLzm5m9qwGkoJK0WA/OEdSQ3HI\nCDbNfgnLwv/t8lqqCju3NfHjZ2Wo1b7zOwrhCUqQQlbKVIeP12l0ZAweSe71n7uxVJIYpE9kYb53\nuaP+a5tqmbfxQTaVbaCsrpRYJZaslKksGLe419sEuotaa+bog0+wLO808homcpBEhnCEK0M/48mr\nPqVx4WMQGWn3XLMZHpmvZ9WrUGc2AGCgjosT97L4w+HExDn+O8rn37uk/vvGbDVz2eoJ5Fft6Pa4\nlZe/zkVxYzq1jCVTl4+RPwjvcmX9m61msjfP483C16gz13V6P2PwSNbduJ5mS7PvbB+oqjQWV1Le\nOIihhqMYkmJ67KKeOzeY5ctDunjXRnq6hby8BgyGnm8vn3/vkvrvO7PVzLyNc3itYCVmW+ckPOnG\nDP49bbPdcyUg+xj5g/CuvtS/alI7BFZHUuqlRWVQ23ycQ3VlxIXHMyXlSrLH5PhMy7knqgoZ6Qp1\n9d3P1s7IMPPFFw09Xk8+/94l9e86RxtruPK9y9h7dA82bGjRkjY4ndzrP8egt//t1B0B2T+eJEL0\nQfvgG6wLJnvzPPL2r6WsrpT48AQmJU1m9e5/9nidwpr8tn+X1pWwbPtSmi1NzPn5fAqrC0gzprt1\nwkdfFRdrqKvvedpIYaGO6mowGj1QKCF8wCBDFJtu+Zbqhiqv/i1LC7kP5Buqd/VU/63d0O2D7wDD\ngB7HjJyl0+hINaZ1+63am3Zua2LC5UZ6zvZl41//UsnMtHZ7lHz+vUvq37tkcwkheiF78zyWbV9K\nSd1BrFgpqTvotmAMLUkD8qt2cPnqiW67R18kGw4RTm2Px2m1kJbWEoxVFYqKNKitE7FVFW3Rfk6+\nIIRwFQnIIuC0bD5e0KudXVxpZ00hf/zyd5itju7U5BmGpBhmhq3u8bi0NAsDBsD8+cFkZiqMHh1G\nZqZC9sRviLzoAqIuPIeoi86B226D2p4DvBDCMRKQRcAwW83M3ziHzLd+zoS3R1NaV+K1srxW+DLZ\nm+d57f52KQo5t2zjXp4mnOO07BzV/j8r6elmcnMbyM4OZtmyEEpKdFitGkpKdDxfeCnTKp5hI2PY\nWRGF+sa7DD4rmbC5D7SspxJC9ImMIfeBjOF416n135vNxz0hMSKJDTdv8f7yqPbMZsIeeRjrW+9R\nVD+YBgw0hAyiZsLVpP59BsYYPaoKmZkKJSX2ZmOffFyEUscdvMKz3Efz7NnUL1jkud9DyPPHy2TZ\nk4+RPwjval//qkkl862fU1J30MulOkmn0bF5+nckD0jxdlE6U1W0xQcAG9ak5A7rl4uKNIweHYbV\n6thWj2fzX7bE30Dtpm8kVacHyfPHu2RSlxBdqFQrKKsr9XYxOogLTyBGifV2MexTFKypaVhT0zsF\n0d5uZPED53F/2Z/QVla4upRCeIVqUvm6bBMr85fzddkmj6XQlXXIIiDEKLHEhyf4VAs5K3mKb3VX\nO0hRIGvsUZa9He3wOWu01/NgpBb/+22FOMlsNTN3wwO8XPBSh9c1aLgj/ZcszHRvCl1pIYuAoAQp\nXJJ0qbeLAbSMHc8edQ/ZY3K8XRSnZedY+Z3yIokcoP24cVcqrEOorA3r9hhVhaKdTTTuLJJlU8In\nZW+e1ykYA9iw8XLBcrdP1JSALPxe6+zqf/34ttfKoEFDalQaX928hQ03b2HBuEV+k1LTHn2kwt9u\n3sou0jiTwh6Pj0to6eq2x2yGhx7UMzrNxEUTBjJ+QgSPpuVy6I5s1EoZAxW+QTWpfLDnvW6P+XDf\n+27tvvbfJ4YQP2lNAOItlydN4elLnvPptJnOqF+wmIFbt/B9/nkkcpAqYro8dsoUc8ehaFVFW1yE\n2azhst+cQ/6uECAUgAMks1RNZmmeheF5JUzJ2Mr83PPQG+RxJDyruqGKbYe/w2iIBmxUNHQ/D6Ki\nvpxKtcJtEzVd/hewcOFCfvjhBzQaDXPnzmXUqFGuvoUQbVSTSt7+tV67/+1pM3ls/JN+3Rrukl7P\nsXXrCZs3h9K8n/Hbivm8wW00/BRYQUNYmJVbbjGRnd3c8tJPy6oMq95AU1fHb1lCPhd2cQMdBxjO\n8/nD4fJPyV5/kQd+KSGg0dzI5asnsrOm596f9gYbot06UdOlT5GtW7dSXFzM22+/zb59+5g7dy5v\nv+29bkQR+Lw5uzotKoMnJi7xyr09Rq+nftHf4dH/5bHKCuZHnqD4sJXGRoiLCycior5Dyzgsex7K\n8hcAUAnlfa516Da5O0/jwWoVxSjTwoR7ma1mRr18Jseaj/b63PGJF7t1oqZLx5C//vprJk+eDMBp\np53G8ePHqavrvLesEK7SOrvanbRoGTHoTHSalkQZOo2OjMEj+fjGL9x6X5+iKFiTU1CMCqmpNs49\n10ZGBp26qUPWftj2YzlDKWeoQ5cvIZEj63dJrmzhVo3mRtJWpjgVjPVaPYvG/90NpWp3D1derKqq\nivT09Lafo6KiOHLkCOHh4XaPHzRIQa/vfm9WX9fVAm/hGUlxMVyffh3PbHnGbfcYFTuKbXdvo0qt\nYnvldkbFjGKwEljjxc7q8PnfdxgOlbX9OJRyhlHCAZJ7vE4iJaStWozyWCEcPAjDhsE118ATT4D+\np8eUqkJ5OQwdKglIfiLPH8eZrWZiHh/OsaZjTp1/zwX3cFpCfIfXXF3/bh346ikJ2NGj/v0tWDLl\neFdr/c8591EaGprJK8rlUF0psWFxZMZnYtCF8nJh5yUMjmrdTvGDq9f99P85hJHhP8NWD0fq5f97\np8+/PpyouHh0ZS1DCAoNXMManuEPPV7rat5H+TL35AsHDsAzz6A2NFOfnUNY9jxC8taiLSvFOjSO\n5szx1C1YDJGRLv6t/Ic8fxxntpqZ+PZoahpren1ufHgCU1Ou4uHz/tKhvt2RqculAXnIkCFUVVW1\n/Xz48GGiox1PLiCEM/RaPQvGLWLuhY9SqVYQo8SiBCmYrWY0Wi2vFqzAYrOfeSo1Ko2x8Zl8cuBj\nDtWVEheewMWJF3NlyrWMjB4VcDOn3UpRaJp6FcqykzPen+ABAN7nGkpIJJYKjFRxnAGUkkgiJVzD\n+zzOg50upxLKng/20HzkadLfe5kmoJzhDC0rR1n1JiEffUDj9BnUZ+ecbEULcQqz1cykf45j99Ef\ne32uBg1vTn2HVGN6zwe7gEs/xWPHjuXZZ5/l5ptvpqCggCFDhnTZXS2EqylBSoflCK1jPhpsrMhf\n3un4tKgMPrvpK/RaPfMv+kuHYC6cU5+dA1YrhlVvoqk7gR4LT3M/C5lHOUMZSjkKDdQT2vazYUAI\n2hNWaNmCGTM67ucJXuZOTlREwnugZy4hNNNAKMMo4RrW8ETdA23BXza2EF2Zt3FOr2dTt0qIGEZS\nZM9DLq7i0oB83nnnkZ6ezs0334xGo+HRRx915eWFcMqCcYvRa4PIK8ql7EQJMWGxZCVP+en1lj+B\nU4O5cJJeT/3Cx6mf/xe0+/aiPPsUIR+9T6i5gRT2g0aDKS0D9cVXiN72HerosahRRqIyf4aupGW7\nzD/xOM9yX4fLmgnBTAjQso75Gf6ACT338zRD1n4Bf6hCW1uLNSZWxpdFm5ZlkR/2fGAXPJ3+VnZ7\n6gMZw/Gu3ta/alKlFexCDte/qqL9cRfa6iOYzz0fjJ2HAQZOuIignYWohHIWhZQwvMfL6jBhRUMS\nJVytfMYTDb9BmzCUpksvo2HWPVjj4gM6OMvzp2dFx/cz+o3zsLZ2vzhIi5aZGXd1+NJ+Kp8fQxbC\nl0kr2EsUBeu553X9SFRVdAeKgJalUqU4tozNQhDQ0mJeov4KDXU8XXI/yorlhK5YjjU2lqasq6jP\nWSRjzP1UjBJLXHg8pXUlvTrv1rQ7eGz8k24qVdckl7UQwqu0xUVoGhqAlqVSiTiX6OV9rkH9KYuY\nBtBVVKCsfJGBl45vSagt+h0lSGFKypW9Oidj8Ei3rzfuigRkIYR3NTa2/VOhgevoPsF/V0pIbEtE\ncpB4Xmc6B4knqCCfsPmdZ3GL/iF7TA6zRt5NeFD3a4aHhsVxZ8avWHfjeq+lwpV+HCGEdxkMHX58\nggewomEFd1FPBC3t3Z4lUkIEx4imgiqGtL0+mMPs/XA0PKIG9JiysE+v1bMw83HmX/QX9h3by9If\nlrC5bBMV9eUMUYZw6fAs7jn7t8SFx3t9bolM6uoDmVThXVL/3uWy+ldVjBmnoz0lzW4VUYzkByoc\nHFP+PU/xBtPt7ko1mEp2banAmpyCqkJlpYaYyHrCayv8dma2fP5bJmoW1xYBGpIihzscUF0xwVMm\ndXmRzNAVwk0Uhcabb23blKLVYGqYxjt2M32NijrI0bB4Dh3SkhBazTW2Ndxb/zTPnLJcqlUVQ9jf\nBMvnaMnL1VFWGcQQbR0/t+7n+dg7GXT1RZJgxI+YrWYeXH8fq3/8J43WliGP8KBwrky5hjFx4zg/\n9mfotfq253VL4D4A2EiKTPbZCZ79uoVceqKErw9tYnTcWBIiEtuCbmRwJIfVSkBDfHgCi/+TQ97+\ntZTVlRIXHs/Y+Exyxi3mtIR4r/8O/Zm0ELzLpfVvNrekx1z7EdqyEmwaDRqbDTM6/sTjfPBTpq9E\nTRlT0vYxP+88mq36lpZujA2loYqPR/2V200rsN/FbWPy0B18Vm5vO1grZ7ONL+56BctjOa75fTyg\nv37+axpquOiNczjW3HNO6ujgIQxUBlJed4g6c0sPTJg+nFtSb+WvY//Wp7Fid7SQ+2VArmuu44LX\nMzrkNQ3RGTCGGDmklnU4Vq8Nwmw1dbpGuD6cX57/S+ac+yjNlmZpPXtBf30g+Qq31L+qoq38qRu5\npprgrzfRfM55NNZZqazWE31unN0tGrVF+6m7cCrDKaGrgBxPCWUM6/LWo3QFPJvb0t2dbDiEISmm\npSu7fZl8qGu7v33+zVYz2ZvnsWLHi5htfZ81P2vk3SzMfNzp8yUgu4DZaiZ52VCarE0uuV7G4JEc\nbzxOWV0p8eEJZKVMJXtMTo/fvKQLvO/62wPJ1/hU/asqUWMvILbsW7tjyIOo4jgDsXY7SnfyURhO\nLdeGrmPhsP+j7KhCzWEb5w89RMRVY32ma9un6t/NSk+UcP+/7+XLUtdtearT6PnxrgNEhji3QYkE\n5D5qNDcy6uURDnV19MXsUfewYFzn3LqqSWX/sb08//0Svj60mfL6Q70K4qKj/vRA8kW+Vv9h8+dg\nXfYyyRR1mmWdTxrn8z1lJPbyqlZaW9warIziB76462Wf6Nr2tfp3h5bezJHUNFa75fo3jriJ5yd3\nznPvCHcE5H61DvmKf13s9mAM8ObO16htqm372Ww1M3fDA2SsPJ1L3hnH6j3/pKy+FCtWSuoOsmz7\nUrI3z+twjeqGKjaUrqe6oerUywsh7KjPzkE763Yqw8/gAIm8ym0UhaZSPHMeg+MVrmGNE1fV0hKQ\nNdjQ8QPnMenV2ajVKkVFGlT/3kHW5fYc3c3z25aw5+hul1zPncEYYGPpBlST7/xP7Dct5OqGKtJX\nnt7rnKbOOm3g6byW9TYGvYE/b3qYtfs/6Pb4xIgkNty8Ba1Gy5R3J7GzuhCLzdK2J2/u9Z9j0Bu6\nvUZ/0x9aCL7MZ+tfVdEWtyyFsSYNB0UhbP4cgpct42ds4XvO7+MNLBgHmKk+HkxibBNZWRayc6we\n78X2pfo/1niMka+MoMlyMslLiM7Ajjt2M9Aw0Klrlp4o4bzX3LvtoQ4dm2/9zqkZ19JC7oPC6gKP\nBWOAfcf2Muat8znvtfQegzFA2YkSKtUKprw7ifyqHW3791psFvKrdjDl3UnuLrIQgUFRsKamY01N\na5uEVZ+dQ/OsWWwJm8xsnkdPEy1jxq3/9YaO6uMhgIaSCgPLVoax4NKt/To956nBGKDJ0sjIV0Y4\nfc2vD23qa7F6FBeRQIwS6/b7OKrfBOQ0Yzo6ja7L97VoiVfiPViijpSgMIK0Qeystr9v587qQum+\nFsJZP20LebxgFwvXn8+utVt55c18Vr56lOm39b3LMrcgGc387JYfVBXttu/Qf/YJVAfu32zrsNqG\nkq86BeNWTZZGp7uvR8eN7UvxHOLp7RV70m9mERlDB5NqTCO/aken94I1wez9VSlWm5VKtYIgbRBT\n/jWJCrXCo2Xc9VM3tT0Wm4XC6gIyEyZ4tExCBBKzIZh5R14id9+HHG6oRNGH0XiaCX6+CL6fCc2R\nOJqqs70DJJL7LwsXl/4PCV+s6tBaNqdncCzvi04pQv1Vo7mRKe9OorDKsV7HTw98zBmDet9STohI\nJMpg7HEM+YYzpvE/59zLivxlvLt7NQ2Wli9Yij6MxIhhHG86RqVagaIPA2w0mhuJi0ggK3kK2WO8\nPzmvvX4zhgwtH6RzXk21+z+4/cxoZ/fQ7AudRkfu9Z8x5d3JdoOyTqMjf+YejKGd95Ltr3xpDK0/\n8qf6b13h8Kt1M9l3fK/9g5pDofp0NBv+yKDd46kxD6N1QtdJXQXrlseoDjPp5PMm00mmGIWWXaxM\nGSM59oVru2A9Wf/tl2le+d6ldhs2Xdl0y7dOBWSwP8tagxawkRAxrC2otq5QsZdKs33ZAZctN5XU\nmX1ktVkJ04dRQ+eAnFeUy9wLH0UJUjAaBhOqC6XeUu+xssWFJ3BmVGqXrfhUY5oEYyF6yWw188im\nh1m18422TE1dCm6AoTuw3TSTY42hzM0bynVFxzDVDmdf1M+5o+Y5rHQ17NUSqC0EsZ1zyaCA4RRz\nDWt4ggfQFxa0dF8b/etvuDUZR2umwqgQI1VNRxw+P0RncDoYA4QHh7PrrqIOWRWjDMYug6oSpJBq\nTO/0WvtJW76YMrNVvwrIlWoFZXX291otOVFMWV0pyQNSuPb9LI8GYzg5lpF7/eddzrIWQvSsfYvo\nz5se4rXCl3t9DauhgYXX7eepZhhaV0OTsYi0d54lP9/RK2g5QDLP8AeqMfKgdRFR23YSPDmz12Xx\npuzN81i2fWnbz70NxjvucM3yp4SIRH5x5s1tP/tyUO2LfhWQFX0YBr0B1Wx/EsfyHS3J7XvTHeMK\naVEZbWMZBr2BL27aRHVDFYXVBaQZ06VlLIQDTm3NabVazNa+zXxuCIb9UYDtKO+8vZvZN53BzkIt\nFpsWLZafWszdjzm/zgxeZwbhtzdw43QNs39tIS7OZj8Lpw+l6VRNKnn71zp1rjHEyHe3F/jUhCl/\n0C8Ccusf6ps7X+syGAOsO/AxFjt5q93thOkEzZbmDpm6jKGDZQKXEL0wf+ODrMg/mXXJanXtHJBt\nNZv44t+JVFdD4TYTcZZixt1xNhZrT5PAWt6vMyu8/Cq8/KqN6GgrV1xh5tFHm6mu1hBjNBGdM4eQ\nj9e2BOT4BJqypno1TWd3PYo9qWk6SqVaEbAtWXfpFwF53sY5rMx/scfjyuvKsPV6TWLfHaorlQ+v\nEE4yW808vOEBXil4ya33aV2GYzRC5uQg4HRS06zk5/d29aiGI0d0vPaajtdfD8Jm0xCnP8y15pE8\nwwo0WNGVHERZ1tJVXL+gcxped6usr2D+hjlOnx8fHu9T63v9RcCtQ1ZNKkXH91PdUMXeo3u474v/\n4ZV8x/5Qh4bHd7tW2V3iwn1rcboQ/iR78zy3B+Mog5GEiM55sHNzG8hIN6HTWOh9ghGw2VpScx4y\nx/A895LODr5nJCqhAITk5eLJ/JyN5kYufnsMI18ZwacHP3F6pcmUlCulu9oJAdNCbu2Wzt33EaX1\nJWjQYuvlh+mSYZOcmgDSV762OF0If6GaVNbu+9Ct9xgUEsW3t9mfV2IwwBf/bqS6GrZ/ayJ35VG+\n/EJHMUk4s555N6mcyw+EUscdvMqSsvtburCTU1BVqCxuYig/bQ2J/aUzQIc5KA3mhg77vndnyruT\nKKh2eOaaXbel3u5z63v9RcAE5FNnA/YmGIcHRTA99TYmD7vc4wF5avJV8uEVwkmVagWH6st6PtAB\niRFJZCVPYdbIe/ikKBewMfW0q3sMYtDSjX3x5UFcnBmOYex47i2bwyvc5WRJNDQQwT/4DRuYwNLj\nsbz8oJ7PVtdTWjeQYZzg6rAP+fv1m+HR/4XIk9sHtibt2NlFkqFBIYP4bkYB4cHhnd6rbqiisMp+\npkBHnT7gDJ68+Lk+XaM/C4iAXNNQw0vbl/X6vDB9OFeddjULxi0mMiSS6oYqtOiwYj9blqtp0fLE\nxGdk20XhcfYSKPTlWpVqBZHBkeyu+ZFdR3dy1qBURkSdSW1zrVv3/I5RYokLi6esvneTjxStQpOt\nibjwBCYnXcaskXcTH57QVs67z/kf5wqkKOimTmL5stlEcoJXmEktrQGz9y3mAmsG4y+z/XRuSzf2\nAZJZUj8LzWsnePK9s2icPoP6e+9Hv3sXt+x5kPzGnV1e72jTUU5fnsiB2eWdNqtpyffv/LNPr9Gz\n9oZPnT5fBEimrnFvXsDuY71b73bt6TfwxIRnOm1O7cy1nJUWlcGXN2/2yL0CkT9livIVrYky3ip8\nvW2tfag+lGkjprNw/OO9+nI4yBjKb97/HXn7P6KkrqTL4xLDE8lKudJte37P3zinQ+9YT6IMRrbe\n+gPVjVXu+bJgNhOWPY+QvFwaS6tYbxvHVD7C1e2f4eyngAwUGrBpNKg2A6UM5ejgcibc3UBzUNfn\njhh0Jhtv+U+H11p2xDvD6aB8Z8avWDT+706d64/ckanL7wOys9sqrp/2daeMLtDS2s545fQ+r1/s\nSZTByPe375QtFftAAnLvzd3wQNt6+1OlRWXw2U1f2Q2a7ZNtKEEK1Q1VPLTpD7y/+32H790+Pa0r\ntWXj2vUmdaauPw9atJwVlcrHN/7bM393qor2x50YrriSDNsODpDs0svrMPEjZ5FEMX/icd7nWg6S\nyDBKmKJZw4sPPYApxH5w1aCh8M59nXIcXPLPsV3mYQjRGbrcRCLdmMGnv7D/2QlUEpDt2FC6nhs+\nuKrX562f9g2pxjS77/X0jVuHDoudb5EGbSiN1oZu7xuiNfCLM29i8YSn+9WH1x0kIPeOalJJX3ka\n9eaus9DdeMZNPDFxSVur8dRkG3FhcagWlZrGml7fv3XPb3d1X7fvho8IjmBXdSHG0MFEK0MoOr7f\na0l2Bl4ylgfy7+QZ/tDpPT2NmGnZyrG3WlvIc8mxe+3phqd486H7uzz/X1d/2CnXQaO5kax/XUJh\ndUHbElCdRs+tqTNYMG4Rj2x8mHd2r2r7DCk6hZvOupmFmU/0u+eZBGQ7nGkhhwdFkD9zT5cPhrYZ\n2/vXUlp3EJ1Gh8VmYWjYULKSp6LV6Oy2MmaNvButRsuH+z6gov4QQ0KHcFnyFGam/xKz1YxBH0JS\nZLLMqHYRCci9s7O6gAlvj+7xuPjweKamXE32mJxOkyX7oi+bwfu1xkbCsy7joYKZfMBVlJBIIiVc\nw/v8lUfYTzIX82+OYezVZX/PUyxkHmkUUGyn9Z3Efmrvy+DowM6NBC1aCu7c2+UXlOqGKrYd/g6j\nIZozo87q8Mxq+eJzALD16+eZBOQudNfNYs+skXezMPPxHo9rP1ml/eSU1oC97mAeJcdLiAtP6LDr\nyKnde8I9JCD3zrbK77j8Xxc7fPydGb/i0wMfU9rN+HBvDAkdwtbbtvffv4nqKpq37eToO1+S8t5z\nbTtBATQSzM/ZQj4jsbWlh7DXarb8tGnF+zzBAxSTxAh+xGpnfFqHiadSzuIPt+7Hckp6hbOiUvnq\n5i2u+936IQnIXWjtZjl1/ZxBG0pMWCx1phMcbawhLjyeKS6cXBI2UEd+8R4JvF4iAbl3HG0htzLo\nQmm0dD8E0xsz0mby94lLXHY9v2U2E/bIwxhWvYmm7qfPr16Pxmymiij+w88YSA2vM4Ncpra1qKew\nlt/xLPGGGkKjDOgqK6hDYaTlB7vj07GUs4Wf8a8Ly7g/6+TrIdoQ9swqkfkrfSQBuQfVDVVsq/yO\nsOBwBoYMbOtOcVeLVQKCd0n9945qUkldkdK2gbsnRRmM5M/c0+/GGbulqmiLW8a8rfEJDLw2i6D8\njj19KqGUM5ShlLe1qNXZ91A/91GizXUcMetZkPYBS6z32rmB7af9mbdz8L4xHBvYDHQ/f0Y4TgKy\nj5GA4F1S/71360fT+PRgnkfvmRqVxic3fiktsp6YzYTNf5CQvFy0FeXYtDo0lnarPYKCaJhxJ/UL\nHgO9vu3zb65rZOE5H/JR7cSfWsqdu7oz+I787AuIVYbyza3bpEfPBSQg+xgJCN4l9d97lfWVjHzl\nDLfeY8TAs3h20lKqG6s5d8h5sn1ob7XfgrFBRb/tO6zGwVjPTO2wJWP7z39xRQGWC29kTEMRVjov\nQNZhInp2LOeMvohXp67y2K8SyNwRkKX/SIh+JCYshnRjRp/zFbenQYMNG1qNljRjOrnXfy6t4b5Q\nFKzJKW3/Nk++vMdToo3JvDsoDWuD/Ue6BT2jto/ijl86m85TeIIEZCH6mbwbvui01tQZwwYM44qk\nqdx77v3sPrrLa+t8BShBCrXTBqF7yozFbgvZTH3Cdmxar3WICgdIQBainzHoDfx72uaf1pr+lzB9\nODd/eD0NVscne9044iZeuXEl9cdaEuTEhMW4q7jCQfdc8zvWPLWd7zm/03sj2c6x6BrOHdL5PeE7\nAm4/ZCGEY4yhg5mcdBmj48dwa/oMh8/LGDySJZf8QyYG+RjN8NPZZJjMOXyHDhMts6xNnMN35GnH\nYDg9TXowfJwEZCEEfx37N+7K+BV6TcdOM027GbuKPoyZ6Xex7sb1snzJFykK3DaNbVxABbF8ziVU\nEMs2LuCzceG8d8uX3i6h6IHMsu4DmeXrXVL/rqeaVH6s2dk2QzpUr3S5TaPUv3fZrf/WnabWfoS2\nvIz6IUaapkzFuuBJ0EEHALMAABRWSURBVMuXKFeSZU8+Rh5I3iX1711S/97Vbf23XzqlyNCCO8iy\nJyGEED1rv3RK+A0ZQxZCCCF8gARkIYQQwgdIQBZCCCF8gARkIYQQwgdIQBZCCCF8gARkIYQQwgc4\ntezJbDYzb948Dh48iMVi4cEHH+SCCy5g165dZGdnA3DmmWfyl7/8xZVlFUIIIQKWUy3k999/n9DQ\nUN566y1ycnJ47LHHAMjJyWHu3LmsWrWKuro61q9f79LCCiGEEIHKqYB89dVX8/DDDwMQFRXFsWPH\naG5upqysjFGjRgFw8cUX8/XXX7uupEIIIUQAc6rLOijo5H6br7zyCldeeSVHjx4lMjKy7XWj0ciR\nI0f6XkIhhBCiH+gxIL/zzju88847HV679957yczM5I033qCgoIB//OMf1NTUdDjGkRTZgwYp6PW6\nXhbZt3SVk1R4htS/d0n9e5fUv3e5uv57DMi/+MUv+MUvftHp9XfeeYcvvviC559/nqCgoLau61aV\nlZUMGTKk22sfPer4hui+SJLre5fUv3dJ/XuX1L93uWNzCafGkEtKSli1ahXPPfccISEhQEs3dkpK\nCt9++y0A69atIzMz06nCCiGEEP2NU2PI77zzDseOHWP27Nltr7300kvMnTuXRx55BKvVytlnn82Y\nMWNcVlAhhBAikMl+yH0gXUbeJfXvXVL/3iX1710+02UthBBCCNeSgCyEEEL4AAnIQgghhA+QgCyE\nEEL4AAnIQgghhA+QgCyEEEL4AAnIQgghhA+QgCyEEEL4AAnIQgghhA+QgCyEEEL4AAnIQgghhA+Q\ngCyEEEL4AAnIQgghhA+QgCyEEEL4AAnIQgghhA+QgCyEEEL4AAnIQgghhA+QgCyEEEL4AAnIQggh\nhA+QgCyEEEL4AAnIQgghhA+QgCwClmpSKTq+H9WkersoQgjRI723CyCEq5mtZrI3zyNv/1rK6kqJ\nD08gK2Uq2WNy0GtPfuRVk0qlWkGMEosSpHS6Tk/vCyGEK0lAFgFFNak89NX9rPrxzbbXSuoOsmz7\nUgDmXvgoZXWlLN/xAp8d+MRuwG4f0EvrSohVYslKmcqCcYs7BPTW+0nQFkK4gsZms9m8dfMjR054\n69YuER0d4fe/gz9rX/+1TbXc+/ndbCzbwAlTrd3jw/ThRARHUKGW233/roxZ3H32b3nhh+dYkb+8\n0/sZg0ey7sb1HYJ27v6POFRXRlx4PFNSrmwL6v0hUMvn37uk/p2z5+huPj3wMZcOv4IzBo1w+jp9\nqf/o6Ai7r0tA7gP5g/Cu6OgI9pWW8af197Fm7+o+X0+n0WGz2dBoNFhsFrvH3JnxKxaN/ztzNzzA\n8h0vdHr/rozZ6LW6HrvLA4F8/r1L6r8j1aRSXHuARnMDoMGgN5AUObztC/GxxmOMfGUETZbGtnNC\ndAZ23LGbgYaBvb6fBGQfI38Q3mO2mlm07S+89N+XqDPVeey+Q8Pi+OKmjfzs9VF27xukDcJkNXV6\nffaoe1gwbpEniugx8vn3Lqn/FrVNtdz/5e9YdyCXxnbBFsCgNXBz6nTmX/RX0lam0Gxt7nR+iM5A\nyd2He31fdwRkmWUt/FL25nk8s+UZjwZjgMP1lWw7/N8u72svGAPkFeXKbG8hXMhsNfPQV3/k9JcS\n+GDfu52CMUCjtZGXC1Yw4qVhdoMxQJOlkT1Hd7u7uA6RgCz8jmpSydu/1iv3tmBh1c7Xe33eobpS\nimuLZBmWEC7yyKaHWZH/okPHWrF2+/6nBz52RZH6LLAGtUS/UKlWUFZX6rX7f7B/Ta/PCdEamPbh\n9VSqFcSGDeWK5KnkjFsUcOPKQniCalJ5s/A1l13v0uFXuOxafSEtZOF3YpRY4sMTvF2MXlEt9VSo\n5diwUV5/iJX5LzJx1UXUNtmfES6E6FpxbRGqxTU9TSE6Q59mW7uSBGThd5QghayUqd4uRp/tPrab\ns18+k/kb52C2mr1dHCH8RqO5ySXXaZ1l7Sukv0z4pewxOYSGBv9/e/cfFVWd9wH8PcMMzdwZUGcQ\nECECn1V+iT82D4VBUub6oyxrR0efeFrbzefZzuMpnzYx8iC2DQEVp3o0bdfYbXdRfuWpczbaqHU1\nTbS103k4ofRk+vgDBB0BdRgIxuH5w+McMVRmmDv3cuf9+su5c++Hjx8v83Hu/X6/FzsPf4BWx2lM\nMMRgakQ6po2fgX8ZNwW/qs+VOsVh6b7c7Vm0RGmjsInEotPoRnR8RvRdKMvZJJtvxldx2tMIcNqB\ntMaPD8OJ1nbPAhyhIaEo+OIF7DjyF3S7uqVOzytxYfHYaz04qhYR4fkvrdFa//M9dhw404BLvRcQ\nqtEhYUwippiSvDr3nf1OpJZPQvdl737PQ1Qh+LfUJ/0yfkOMaU/8hkyjmqAVkDAmEQCwfl/ekIt1\njAatjtNod7Z5/i5EStPr6sX82hwc7mj60XtqqPGLtF/i5WE2SkErYHnK417/vj+RuhLF2a97dUwg\nsSGTIjj7nag79lep0/BZjDEWUUK01GkQiWZ+7X1DNmPgyrSk8m9+D7VKjaKsV4cV76XZr8A94Maf\nmv4A18CNx2CooEJs2O1YkLAQhZk2n3IPFDZkUoR2ZxtaHS1Sp+GzBQkLR9XlaiJvnO+x43DHN7fc\nb8eRv2D9XRuH9bugUWtQnP06Cu7+Lb7taEarowUmnQm9l39Aq+MU7o27DyadeVStKc+GTIoQJUQj\nxjgRpx2npE7FKxqVBk9OfUr2/3On4HJ1XWhgAPHhCSNuZgda9w9rv25XN05cPI5kc+qwYwtaATOi\nZmJG1Mwh3x9Nt4HYkEkRBK2AhYkPekYsjxZ1j36K6VE/lToNCnJXGvBxuNyX8fvGLfjw6E70XO4B\nAAghApYmWVGU9ZrvA6FUKm929u1nKAAbMilGYaYN7gE3KpsrAr7GtS/iwuIx2ZQsdRoUxFxuF/L3\n/gbVzZU3XGjDedmJPzaV48szB/HZ0r0+NeW7Jtw9rP0MGgPiw+/wOr5ScGEQUgyNWoOirFfx0aOf\nivpz4oxxfokzN37eqLivRcrkcrvwQE02/thUPqxVrw53NOH+6nt8WsTGrI9AiunWl6GXJz8e1L8T\nbMikOPHhCYgz3i5K7LSIqSif7/3DJYbyq6n/7pc4RL5Y9/lzaDp/64FW1zrScRjr96316ef97ef/\nQIopbcj3NNDgybSn8NLsV3yKrRS8ZE2Kc3VpTX/eT37g9p+hLOe/EWWIhrPfCaPWOKLL4nFh8aNu\nPW5SDme/E9uP+PZwho+OfYSCu1/2+pusTqPDbuv+KwuDtO7Hpf6LCFX7tjCIUrEhkyJdHbX88fE6\ntDpOI1qIQc/lbnT0dvgU7z+m/yeiDFfmCQtaAdakfx3RIiSc5kRS+rbjyE3n7t7MWWfbiBaxMesj\nsGjSYp+OVTo2ZFIkjVqDl+8pQX7GBs88xB6XE8l/8P5DJEQVgpTrpmG8NPsVXOq7hKpvt3sVa4Ih\nBg9NepjTnEhS53vsPh8bbZjARWxEwoZMinbt0ppftf/TpxjJ5hSY9RGDtmnUGpRkl2Hf6c/R0j28\nZzNH6aOxa+m+H8UiCrQZI5hqNzPqTl7dEQkHdVHQSDGnQu3FKa+CCmkRU1H36N+HfF/QClg06aFh\nx3v4J0vYjEkWzPoIJI8b/uIb1yrOes3P2dBVbMgUNMz6CKRE3PhDyKgNQ4gqBBMME7Eo4SE0PvG/\n2LX0i5s+6q0w04ZV6b9GXFg8QlQhiDXGwaQzQXXN4gYatRZPpnE1LpKXTyz/gEln9uqYtIipnrEU\n5H8jevyi3W7HggULsGnTJmRkZKC5uRmFhYUAgClTpmDjxo03PX40PjrsWqP18WdK4Uv9e129WPD+\nfYOme2jVWuSmrER+RgHO99p9WvfW2e8ctGbu+R47vj77Fcy68YodQcrzX1r+qL/L7cJvdj+DHc0V\nGID7hvupoUZKRCrqHv37iJ9FrBRiPH5xRA157dq1OHr0KPLy8pCRkYHc3Fw8//zzSE9Px3PPPYfF\nixfj3nvvveHxo/2XmR9I0hpJ/c/32PF1+1cw6yMwxZSsyIYpNp7/0vJn/Z39TnzbcQSt3a0w6UzQ\nheig0+gRFhqG4xeOIcWcytst15HV85AbGhpgMBgwefJkAEBfXx9aWlqQnp4OAMjJyUFDQ8NNGzKR\nVMz6CMy942dSp0EkC1ce0PBTzMCPB3vFhvlnZTq6NZ/uIff19WHz5s1Ys2aNZ1tnZyfCw8M9r81m\nM86dOzfyDImIiILALb8h19TUoKamZtC27OxsWCyWQQ34esO5Ej5unACNJmQYacrXjS49UGCw/tJi\n/aXF+kvL3/W/ZUO2WCywWCyDtlmtVrjdblRUVODkyZNobGxEWVkZurq6PPu0t7cjMjLyprE7O2+9\noLmc8R6atFh/abH+0mL9pSWbe8iVlZWeP69btw5LlixBUlISEhMTcejQIdx5552or69Hbm6uT8kS\nEREFG7+u1JWfn4+CggK43W5MmzYNmZmZ/gxPRESkWCOa9jRSo/1yCy8ZSYv1lxbrLy3WX1piXLLm\nSl1EREQywIZMREQkA2zIREREMsCGTESK5+x34viFY3D2Dz3Vcqj3b3UMkb/xechEpFgutwuF+1/E\nx8c+QovjNCYaY/HAHfPwePIvEKLWYKIxFqX/tKHu2F/R6mhBjHEi5icsRL/bhfr/+xjt3W2YaIzF\ngsRFKMy0QaPmRyaJh6OsR4CjHKXF+ktrNNR//b48/K5xyw3f16g0cA24hhVrZdpTKMl+3V+pjdho\nqL+ScZQ1EdEwXfzhIrYf/vNN9xluMwaAPzWVY93n/wWXe/jHEHmDDZmIFOnFfWvhcDn8Fu/ywGWU\nf7MNhftf9FtMomuxIROR4jj7nfiiZa8osT8+XseBXiQKNmQiUpx2ZxtaHS2ixG51nEa7s02U2BTc\n2JCJSHGihGiY9RGixI4xxiJKiBYlNgU3NmQiUhxBK2B+wkJRYi9IWAhBK4gSm4IbGzIRKVJJdhnU\nfvyIm2CYiFXpv0Zhps1vMYmuxVnuRKRIF37oghtuv8UruqcEiyYt9ls8ouvxGzIRKdLXZ7/yazze\nNyaxsSETkSIZNEa/xqv9rtKv8Yiux4ZMRIo0VjfWr/E+PfEp5x+TqNiQiUiR4sMToFfr/RaP849J\nbGzIRKRIglbAsuTlfovH+cckNjZkIlKsoqzXkBYx1S+xOP+YxMaGTESKpVFrUP/zPViZ9hQi9ZE+\nxTBojJx/TAHBhkxEiqZRa1CS/Tr2WA8gSn/jS84qqJBsSkGMIRZqqBFjmIhlU1bgf55oxsv3lECj\n5rINJC6eYUQUFMz6CDz8kyX4XeOWId9fmfZLFGeXwdnvRLuzDVFCNC9RU0CxIRNR0CjMtME94EZl\n83Y4+i8BAIzaMFiTVuCl2a8AuDIYLGFMopRpUpBiQyaioKFRa1CU9SrW37URJy4eB6BCfPgd/CZM\nssCGTERBR9AKSDanSp0G0SAc1EVERCQDbMhEREQywIZMREQkA2zIREREMsCGTEREJANsyERERDLA\nhkxERCQDbMhEREQyoBoYGBiQOgkiIqJgx2/IREREMsCGTEREJANsyERERDLAhkxERCQDbMhEREQy\nwIZMREQkA2zIPrLb7Zg1axYOHjwIAGhubobVaoXVasWGDRskzk65XC4X8vLysHz5cixduhSHDh0C\nwPoHWlFREZYtWwar1YrGxkap0wkKpaWlWLZsGR577DHU19fjzJkzyM3NxYoVK/DMM8+gr69P6hQV\nr7e3F3PnzsXOnTtFqT8bso9KS0sRFxfneW2z2ZCfn4/Kyko4HA7s2bNHwuyU68MPP4Rer8eOHTtg\ns9lQXFwMgPUPpC+//BInTpxAVVUVbDYbbDab1Ckp3oEDB/Ddd9+hqqoK27ZtQ1FREd566y2sWLEC\n27dvR3x8PGpra6VOU/G2bNmCMWPGAIAo9WdD9kFDQwMMBgMmT54MAOjr60NLSwvS09MBADk5OWho\naJAyRcVavHgxXnjhBQCAyWRCV1cX6x9gDQ0NmDt3LgBg0qRJuHDhAhwOh8RZKdusWbPw5ptvAgDC\nw8PR09ODgwcP4v777wfAcz4Qvv/+exw9ehRz5swBAFHqz4bspb6+PmzevBlr1qzxbOvs7ER4eLjn\ntdlsxrlz56RIT/G0Wi1uu+02AMB7772HBx98kPUPMLvdjnHjxnlem0wm1ltkISEhEAQBAFBbW4vs\n7Gz09PQgNDQUAM/5QCgpKcG6des8r8Wov2bEERSspqYGNTU1g7ZlZ2fDYrEMagDX42qk/jFU/Vev\nXo2srCxUVFSgqakJW7duRUdHx6B9WP/AYr0D57PPPkNtbS3Ky8sxb948z3b+G4jrgw8+wPTp0wfd\npryWv+rPhnwTFosFFotl0Dar1Qq3242KigqcPHkSjY2NKCsrQ1dXl2ef9vZ2REZGBjpdxRmq/sCV\nRr1r1y68/fbb0Gq1nkvXV7H+4oqMjITdbve8Pnv2LMaPHy9hRsFh79692Lp1K7Zt24awsDAIgoDe\n3l7odDqe8yLbvXs3Tp06hd27d6OtrQ2hoaGi1J+XrL1UWVmJ6upqVFdXY86cOdiwYQOSkpKQmJjo\nGfFbX1+PrKwsiTNVplOnTqGyshKbNm3yXLrWarWsfwDNnj0bn3zyCQCgqakJkZGRMBqNEmelbJcu\nXUJpaSneeecdjB07FgCQmZnp+XfgOS+uN954A++//z6qq6thsVjw9NNPi1J/fkP2k/z8fBQUFMDt\ndmPatGnIzMyUOiVFqqmpQVdXF1atWuXZ9u6777L+ATRz5kykpqbCarVCpVJxmlkA1NXVobOzE88+\n+6xnW3FxMdavX4+qqirExMTgkUcekTDD4LN69Wrk5eX5tf58/CIREZEM8JI1ERGRDLAhExERyQAb\nMhERkQywIRMREckAGzIREZEMsCETERHJABsyERGRDLAhExERycD/A2+ElSKeW1yWAAAAAElFTkSu\nQmCC\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"metadata":{"id":"EFxHlHHIGNz0","colab_type":"text"},"cell_type":"markdown","source":["#### PCA"]},{"metadata":{"id":"E63htjuTyBgN","colab_type":"code","outputId":"ebf46e1b-7731-4612-a3d4-e0053b413d89","executionInfo":{"status":"ok","timestamp":1551149280781,"user_tz":-300,"elapsed":806,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["X_train.shape"],"execution_count":113,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(984, 1482)"]},"metadata":{"tags":[]},"execution_count":113}]},{"metadata":{"id":"HyF73HzlDzWE","colab_type":"code","colab":{}},"cell_type":"code","source":["n_components = min(X_train.shape[0],X_train.shape[1])\n","pca = PCA(n_components=n_components, svd_solver='randomized',\n"," whiten=True).fit(X_train)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"BdABBY9l9f7Y","colab_type":"code","outputId":"e994dfa1-23f1-4eff-e319-dcf94604df88","executionInfo":{"status":"ok","timestamp":1551149289525,"user_tz":-300,"elapsed":6014,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":368}},"cell_type":"code","source":["plt.bar(range(n_components),pca.explained_variance_ratio_,1,log=True)"],"execution_count":115,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":115},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAewAAAFNCAYAAADCalwrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHRdJREFUeJzt3X9sG+Xhx/GP25tVilMlKTasaJtG\nIsHkprCJ/fBSwthcmETXrzYpxBthssoEW5nSqVQQoiphSxuaUiZQKrTJFAUlo/VapvFDE54mKGKa\nR1SCEhJpisggdGFgm4aQxFDact8/vpu/hKR14p5rP/H7Je2Pe2rfPfco05s7XxyXbdu2AABAUVtW\n6AkAAIDsCDYAAAYg2AAAGIBgAwBgAIINAIABCDYAAAYg2AAAGIBgAwBggPMa7EQioa1bt+rQoUPn\n87AAABjPcmInIyMj2rJli8LhsBobGyVJHR0dGhgYkMvlUktLi9atW6dly5apoaFB4+PjC9pvMjnl\nxPRmqahYqYmJtOP7LTWso3NYS2ewjs5gHZ2Ty1p6vWVn/LdzvsJOp9Nqb29XIBDIjPX19WlsbEzR\naFS7du3Srl27JEkXXXSRli9ffq6HPCeWVdjjLxWso3NYS2ewjs5gHZ3j9Fqec7DdbrcikYh8Pl9m\nLB6PKxgMSpKqqqo0OTmp6enpcz0UAAAl65xviVuWJcuavZtUKiW/35/ZrqysVDKZ1KuvvqoDBw5o\nampK5eXl2rBhw1n3XVGxMi//tXe2Ww5YONbROaylM1hHZ7COznFyLR35DDub//5BsEAgMOvWeTb5\n+BzF6y3Ly2fjpYZ1dA5r6QzW0Rmso3NyWcu8foY9H5/Pp1QqldlOJBLyer35OBQAACUhL8Gura1V\nLBaTJA0PD8vn88nj8eTjUAAAlIRzviU+NDSkzs5OjY+Py7IsxWIxdXV1ye/3KxQKyeVyqa2tzYm5\nAgBQss452GvXrlVPT8+c8e3bt5/rrgEAwH/w1aQAABiAYAMAYACCDQCAAQg2AAAGKKlgb979XKGn\nAABATkoq2AAAmIpgAwBgAIINAIABCDYAAAYg2AAAGIBgAwBggJIL9vfufLLQUwAAYNFKLtgAAJiI\nYAMAYACCDQCAAUoy2HxFKQDANCUZbAAATFOyweYqGwBgkpINNgAAJinpYHOVDQAwRUkHWyLaAAAz\nlHywJaINACh+BBsAAAMQ7P/gKhsAUMys83mwl19+WQcPHtTJkyd16623qqam5nweHgAAY+V0hT0y\nMqJgMKje3t7MWEdHhxoaGhQKhTQ4ODjv+zwej3bu3KnNmzerr68vtxnnEVfZAIBitegr7HQ6rfb2\ndgUCgcxYX1+fxsbGFI1GNTo6qpaWFkWjUXV3d6u/v1+SVF1draamJr3wwgvav3+/du7c6dxZOGjz\n7uf0aPO3Cz0NAABmcdm2bS/mDadOndKpU6cUiURUUVGhxsZGPfTQQ1qzZo3q6+slSd/97nd1+PBh\neTyeWe8dGBjQunXrNDExoX379qm1tTXLsU7LspYv8pTObDF/C/vpB/7HseMCAHCuFn2FbVmWLGv2\n21KplPx+f2a7srJSyWRyTrAnJyfV2tqqdDqtTZs2ZT3WxER6sdNzTDI5VbBjm8DrLWONHMJaOoN1\ndAbr6Jxc1tLrLTvjv+XlobMzXbTX1dWprq4uH4d0HLfGAQDFxJFf6/L5fEqlUpntRCIhr9frxK4L\navPu53gQDQBQFBwJdm1trWKxmCRpeHhYPp9vzu1wkxFtAEChLfqW+NDQkDo7OzU+Pi7LshSLxdTV\n1SW/369QKCSXy6W2trZ8zLWguEUOACikRT8lfj45/eCDU1fKhJsHU5zEWjqDdXQG6+gcIx46W+o+\nGX7iDQA4H/gu8XPEg2kAgPOBK2yHfDraXHkDAJzEFXaecOUNAHASV9h5xpU3AMAJBPs8I+AAgFwQ\n7AKb77Y5EQcAfBrBLkJEHADwaQTbEGd7gI2YA8DSR7CXgIU8jU7UAcBsBLtEZIs6QQeA4kawIWnh\n37NO2AGgMAg2FoWwA0BhEGzkxWK+5Y24A0B2BBsFR9wBIDuCDaMQdwClimBjyVrsH18h8ACKGcEG\n/oMH6gAUM4INLBK35QEUAsEG8mghcX/6gf85DzMBYDqCDRTY9+58MutruFIHQLABA/DHXwAQbMBw\nZ4o5IQeWlvMa7GeeeUbDw8M6fvy4LrvsMt1+++3n8/BASeGqHFhacgr2yMiItmzZonA4rMbGRklS\nR0eHBgYG5HK51NLSonXr1s1538aNG7Vx40bt3btXN99887nNHEDO5os5EQeK26KDnU6n1d7erkAg\nkBnr6+vT2NiYotGoRkdH1dLSomg0qu7ubvX390uSqqur1dTUpNdff12rV6+Wx+Nx7iwAnLNPR5yA\nA8Vl0cF2u92KRCKKRCKZsXg8rmAwKEmqqqrS5OSkpqenFQ6HFQ6HZ73/mWeeUX19/bnNGkDeEXCg\nuCw62JZlybJmvy2VSsnv92e2KysrlUwm572KPnbsmC655JIFHauiYqUsa/lipwggDz4d8GL8/XGv\nt6zQU1gSWEfnOLmWeXnozLbtM/7bnj17FryfiYm0E9MBkAef/P3xYrj69nrLlExOFXoaxmMdnZPL\nWp4t8I4E2+fzKZVKZbYTiYS8Xq8TuwZgAG6fA/m3zImd1NbWKhaLSZKGh4fl8/l4qAwoYZt3P5f5\nHwBnLPoKe2hoSJ2dnRofH5dlWYrFYurq6pLf71coFJLL5VJbW1s+5grAQJ+MNlfeQO5c9tk+cC4w\npz9H4b/2geKQj3Dz2aszWEfnFOVn2ACwGFx1A4vnyGfYAJArPusGFoZgAygKhBs4O4INoKgQbmB+\nBBtAUSLawGwEG0DRItrA/yPYAIoat8iB/0OwARiBaKPUEWwAxiDaKGUEG4BRiDZKFcEGYByijVJE\nsAEAMADBBmAknh5HqSHYAIxGtFEqCDYAAAYg2AAAGIBgAzAet8VRCgg2AAAGINgAloTv3flkoacA\n5BXBBrBkcGscSxnBBgDAAAQbAAADEGwASwq3xbFUEWwASw7RxlKU12AnEglt3bpVhw4dmncbAAAs\nzIKCPTIyomAwqN7e3sxYR0eHGhoaFAqFNDg4OP/Oly1TQ0PDGbcBAMDCWNlekE6n1d7erkAgkBnr\n6+vT2NiYotGoRkdH1dLSomg0qu7ubvX390uSqqur1dTUpNHR0cz7LrroolnbAABgYbIG2+12KxKJ\nKBKJZMbi8biCwaAkqaqqSpOTk5qenlY4HFY4HHZschUVK2VZyx3bH4DS4fWWFXoKxmLtnOPkWmYN\ntmVZsqzZL0ulUvL7/ZntyspKJZNJeTyeWa+Lx+M6cOCApqamVF5eLo/HM2t7w4YNZz32xER6MecC\nABnfu/NJPdr87UJPwzheb5mSyalCT2NJyGUtzxb4rMFeCNu25x0PBAKzbqX/dwwAACxOTk+J+3w+\npVKpzHYikZDX63VsUgAAYLacgl1bW6tYLCZJGh4els/nm3M7HAAKjd/HxlKS9Zb40NCQOjs7NT4+\nLsuyFIvF1NXVJb/fr1AoJJfLpba2tvMxVwAASlbWYK9du1Y9PT1zxrdv356XCQEAgLn4alIAAAxA\nsAEAMADBBgDAAAQbwJLGk+JYKgg2AAAGINgAABiAYAMAYACCDWDJ43NsLAUEGwAAAxBsAAAMQLAB\nADAAwQYAwAAEGwAAAxBsAAAMQLABADAAwQYAwAAEGwAAAxBsAAAMQLABADAAwQYAwAAEG0BJ4A+A\nwHQEGwAAAxBsAAAMYOVz54lEQrt27dL69etVX1+vrq4uvf3221q1apU2bdqkL33pS/k8PAAAS8aC\nrrBHRkYUDAbV29ubGevo6FBDQ4NCoZAGBwfn3/myZWpoaJg1tmLFCp08eVI+n+8cpg0AQGnJeoWd\nTqfV3t6uQCCQGevr69PY2Jii0ahGR0fV0tKiaDSq7u5u9ff3S5Kqq6vV1NSk0dHRzPtuuukmlZeX\nK5lM6rHHHtO2bdvycEoAACw9WYPtdrsViUQUiUQyY/F4XMFgUJJUVVWlyclJTU9PKxwOKxwOn3Ff\n//znP/W1r31Nq1at0kcffXTuswcAoERkDbZlWbKs2S9LpVLy+/2Z7crKSiWTSXk8nlmvi8fjOnDg\ngKamplReXi7LstTc3CzLsnTbbbdlnVxFxUpZ1vKFngsAnJXXW1boKRiBdXKOk2vpyENntm3POx4I\nBGbdSpek6667bsH7nZhIn9O8AOCTksmpQk+h6Hm9ZayTQ3JZy7MFPqdf6/L5fEqlUpntRCIhr9eb\ny64AAMAC5BTs2tpaxWIxSdLw8LB8Pt+c2+EAAMA5WW+JDw0NqbOzU+Pj47IsS7FYTF1dXfL7/QqF\nQnK5XGprazsfcwUAoGS57DN9AF0EnP4che8SBvBo87cLPYWixmfYzimKz7ABAMD5RbABADAAwQYA\nwAAEGwAAAxBsAAAMQLABADAAwQYAwAAEGwAAAxBsAAAMQLABADAAwQYAwAAEGwAAAxBsAAAMQLAB\nADAAwQYAwAAEGwAAAxBsAAAMQLABADAAwQYAwAAEGwAAAxBsAAAMQLABADAAwQYAwAAEGwAAA+Q1\n2IlEQlu3btWhQ4ckSX/729/0q1/9Svfcc4+OHTuWz0MDALCkLCjYIyMjCgaD6u3tzYx1dHSooaFB\noVBIg4OD8+982TI1NDRktp9//nk1NzcrHA7riSeeOMepAwBQOqxsL0in02pvb1cgEMiM9fX1aWxs\nTNFoVKOjo2ppaVE0GlV3d7f6+/slSdXV1WpqatLo6GjmfT/84Q/14IMPqry8XMePH8/D6QAAsDRl\nDbbb7VYkElEkEsmMxeNxBYNBSVJVVZUmJyc1PT2tcDiscDh8xn25XC5t2bJFb775pv76179mnVxF\nxUpZ1vIFnAYALIzXW1boKRQ91sg5Tq5l1mBbliXLmv2yVColv9+f2a6srFQymZTH45n1ung8rgMH\nDmhqakrl5eVas2aNduzYIbfbrebm5qyTm5hIL/Q8AGBBksmpQk+hqHm9ZayRQ3JZy7MFPmuwF8K2\n7XnHA4HArFvpkvTggw86cUgAAEpKTk+J+3w+pVKpzHYikZDX63VsUgAAYLacgl1bW6tYLCZJGh4e\nls/nm3M7HAAAOCfrLfGhoSF1dnZqfHxclmUpFoupq6tLfr9foVBILpdLbW1t52OuAACUrKzBXrt2\nrXp6euaMb9++PS8TAgAAc/HVpAAAGIBgAwBgAIINAIABCDYAAAYg2AAAGIBgAwBgAIINAIABCDYA\nAAYg2AAAGIBgAwBgAIINAIABCDYAAAYg2AAAGIBgAwBgAIINAIABCDYAAAYg2AAAGIBgAwBgAIIN\nAIABCDYAAAYg2AAAGIBgAwBgAIINAIABrHzu/JVXXtGhQ4d0+vRp3XLLLTpx4oQOHjyokydP6tZb\nb1VNTU0+Dw8AwJKxoCvskZERBYNB9fb2ZsY6OjrU0NCgUCikwcHBed93wQUXqK2tTeFwWEePHpXH\n49HOnTu1efNm9fX1OXMGAACUgKxX2Ol0Wu3t7QoEApmxvr4+jY2NKRqNanR0VC0tLYpGo+ru7lZ/\nf78kqbq6Wk1NTZqentbjjz+uO++8U+Xl5XrhhRe0f/9+7dy5M39nBQDAEpM12G63W5FIRJFIJDMW\nj8cVDAYlSVVVVZqcnNT09LTC4bDC4XDmdVNTU7r//vu1bds2lZeXa2BgQHV1daqpqdG+ffvU2trq\n/BkBALAEZQ22ZVmyrNkvS6VS8vv9me3Kykolk0l5PJ5Zr4tEIpqZmdHDDz+sq6++WhdccIFaW1uV\nTqe1adOmrJOrqFgpy1q+0HMBgKy83rJCT6HosUbOcXItHXnozLbtece3bds2Z6yurm7B+52YSOc8\nJwCYTzI5VegpFDWvt4w1ckgua3m2wOf0a10+n0+pVCqznUgk5PV6c9kVAABYgJyCXVtbq1gsJkka\nHh6Wz+ebczscAAA4J+st8aGhIXV2dmp8fFyWZSkWi6mrq0t+v1+hUEgul0ttbW3nY64AAJSsrMFe\nu3atenp65oxv3749LxMCAABz8dWkAAAYgGADAGAAgg0AgAEINgAABiDYAAAYgGADAGAAgg0AgAEI\nNgAABiDYAAAYgGADAGAAgg0AgAEINgAABiDYAAAYgGADAGAAgg0AgAEINgAABiDYAAAYgGADAGAA\ngg0AgAEINgAABiDYAAAYgGADKCmbdz9X6CkAOSHYAAAYgGADAGAAK587f+WVV3To0CGdPn1at9xy\ni9544w0NDw/r+PHjuuyyy3T77bfn8/AAACwZC7rCHhkZUTAYVG9vb2aso6NDDQ0NCoVCGhwcnPd9\nF1xwgdra2hQOh3X06FFt3LhRd999t7xer26++WZnzgAAgBKQ9Qo7nU6rvb1dgUAgM9bX16exsTFF\no1GNjo6qpaVF0WhU3d3d6u/vlyRVV1erqalJ09PTevzxx3XnnXdKkl5//XWtXr1aHo8nT6cEAMDS\nkzXYbrdbkUhEkUgkMxaPxxUMBiVJVVVVmpyc1PT0tMLhsMLhcOZ1U1NTuv/++7Vt2zaVl5dLkp55\n5hnV19cvaHIVFStlWcsXcz4AkJXXW1boKRQ11sc5Tq5l1mBbliXLmv2yVColv9+f2a6srFQymZxz\n1RyJRDQzM6OHH35YV199tW644QYdO3ZMl1xyyYImNzGRXtDrAGAxksmpQk+haHm9ZayPQ3JZy7MF\n3pGHzmzbnnd827Ztc8b27NnjxCEBACgpOf1al8/nUyqVymwnEgl5vV7HJgUAAGbLKdi1tbWKxWKS\npOHhYfl8Ph4iAwAgj7LeEh8aGlJnZ6fGx8dlWZZisZi6urrk9/sVCoXkcrnU1tZ2PuYKAEDJyhrs\ntWvXqqenZ8749u3b8zIhAAAwF19NCgCAAQg2AAAGINgAABiAYAMAYACCDQCAAQg2AAAGINgAABiA\nYAMAYACCDQCAAQg2AAAGINgAABiAYAMAYACCDQCAAQg2AAAGINgAABiAYAMAYACCDQCAAQg2AAAG\nINgAABiAYAMAYACCDQCAAQg2AAAGINgAABjAyufOX375ZR08eFAnT57Urbfeqosvvli7du3S+vXr\nVV9fn89DAwCwpCzoCntkZETBYFC9vb2ZsY6ODjU0NCgUCmlwcHDe93k8Hu3cuVObN29WX1+fli1b\npoaGBmdmDgBACcl6hZ1Op9Xe3q5AIJAZ6+vr09jYmKLRqEZHR9XS0qJoNKru7m719/dLkqqrq9XU\n1KQXXnhB+/fv186dO3XRRRdpdHQ0f2cDAMASlTXYbrdbkUhEkUgkMxaPxxUMBiVJVVVVmpyc1PT0\ntMLhsMLhcOZ1AwMDqqurU01Njfbt26fW1lbnzwAAgBKQNdiWZcmyZr8slUrJ7/dntisrK5VMJuXx\neGa9bnJyUq2trUqn09q0aZPi8bgOHDigqakplZeXa8OGDWc9dkXFSlnW8sWcDwBk5fWWFXoKRY31\ncY6Ta+nIQ2e2bc87XldXp7q6ulljn7y1ns3ERPqc5gUA80kmpwo9haLl9ZaxPg7JZS3PFvicfq3L\n5/MplUplthOJhLxeby67AgAAC5BTsGtraxWLxSRJw8PD8vl8c26HAwAA52S9JT40NKTOzk6Nj4/L\nsizFYjF1dXXJ7/crFArJ5XKpra3tfMwVAICSlTXYa9euVU9Pz5zx7du352VCAABgLr6aFAAAAxBs\nAAAMQLABADAAwQYAwAAEGwAAAxBsAAAMQLABADAAwQYAwACO/PEPADDJ5t3PFXoKWCIebf72eTsW\nV9gAABiAYAMAYACCDQCAAQg2AAAGINgAABiAYAMAYACCDQCAAQg2AAAGINgAABiAYAMAYACCDQCA\nAQg2AAAGINgAABjAZdu2XehJAACAs+MKGwAAAxBsAAAMQLABADAAwQYAwAAEGwAAAxBsAAAMYBV6\nAudLR0eHBgYG5HK51NLSonXr1hV6SkVvz549evnll3Xq1Cndfvvtqqmp0V133aXTp0/L6/Xq/vvv\nl9vt1lNPPaXHHntMy5Yt00033aT6+vpCT73ofPjhh9q4caO2bNmiQCDAOuboqaee0iOPPCLLstTU\n1KTLL7+ctVykmZkZ3X333ZqcnNTJkyd1xx13yOv16t5775UkXX755frlL38pSXrkkUf07LPPyuVy\n6ec//7muvfbaAs68eIyMjGjLli0Kh8NqbGzUv//97wX/HJ48eVLNzc166623tHz5ct1333363Oc+\nt7AD2yXgpZdesm+77Tbbtm37tddes2+66aYCz6j4xeNx+yc/+Ylt27Z9/Phx+9prr7Wbm5vtP/3p\nT7Zt2/YDDzxg/+53v7NnZmbs66+/3n7//fftDz74wL7xxhvtiYmJQk69KP3617+2f/CDH9hPPPEE\n65ij48eP29dff709NTVlv/POO/aOHTtYyxz09PTYe/futW3btt9++237hhtusBsbG+2BgQHbtm17\n27Zt9pEjR+w333zT/v73v2+fOHHCfvfdd+0bbrjBPnXqVCGnXhRmZmbsxsZGe8eOHXZPT49t2/ai\nfg7/8Ic/2Pfee69t27b94osv2lu3bl3wsUvilng8HlcwGJQkVVVVaXJyUtPT0wWeVXH76le/qoce\nekiStGrVKn3wwQd66aWX9J3vfEeSdN111ykej2tgYEA1NTUqKyvTihUr9JWvfEX9/f2FnHrRGR0d\n1WuvvaZvfetbksQ65igejysQCMjj8cjn86m9vZ21zEFFRYXee+89SdL777+v8vJyjY+PZ+46/ncd\nX3rpJV1zzTVyu92qrKzUpZdeqtdee62QUy8KbrdbkUhEPp8vM7aYn8N4PK4NGzZIkr75zW8u6mez\nJIKdSqVUUVGR2a6srFQymSzgjIrf8uXLtXLlSknS4cOHVVdXpw8++EBut1uStHr1aiWTSaVSKVVW\nVmbex9rO1dnZqebm5sw265ibf/3rX/rwww/105/+VD/60Y8Uj8dZyxzceOONeuutt7RhwwY1Njbq\nrrvu0qpVqzL/zjqenWVZWrFixayxxfwcfnJ82bJlcrlc+uijjxZ2bIfOwSg238a6YH/5y190+PBh\nPfroo7r++usz42daQ9Z2tj/+8Y+66qqrzvgZFeu4OO+995727dunt956Sz/+8Y9nrRNruTBPPvmk\n1qxZo/379+sf//iH7rjjDpWVlWX+nXU8N4tdv8Wsa0kE2+fzKZVKZbYTiYS8Xm8BZ2SGF198Ub/5\nzW/0yCOPqKysTCtXrtSHH36oFStW6J133pHP55t3ba+66qoCzrq4HDlyRMeOHdORI0f09ttvy+12\ns445Wr16tb785S/Lsix9/vOf14UXXqjly5ezlovU39+v9evXS5KuuOIKnThxQqdOncr8+yfX8fXX\nX58zjrkW8/9pn8+nZDKpK664QidPnpRt25mr82xK4pZ4bW2tYrGYJGl4eFg+n08ej6fAsypuU1NT\n2rNnj37729+qvLxc0v993vLfdfzzn/+sa665RldeeaVeffVVvf/++5qZmVF/f7+uvvrqQk69qDz4\n4IN64okn9Pvf/1719fXasmUL65ij9evX6+9//7s+/vhjTUxMKJ1Os5Y5+MIXvqCBgQFJ0vj4uC68\n8EJVVVXp6NGjkv5/Hb/xjW/oyJEj+uijj/TOO+8okUiourq6kFMvWov5OaytrdWzzz4rSXr++ef1\n9a9/fcHHKZm/1rV3714dPXpULpdLbW1tuuKKKwo9paIWjUbV1dWlL37xi5mx3bt3a8eOHTpx4oTW\nrFmj++67T5/5zGf07LPPav/+/XK5XGpsbNSmTZsKOPPi1dXVpUsvvVTr16/X3XffzTrm4ODBgzp8\n+LAk6Wc/+5lqampYy0WamZlRS0uL3n33XZ06dUpbt26V1+tVa2urPv74Y1155ZW65557JEk9PT16\n+umn5XK59Itf/EKBQKDAsy+8oaEhdXZ2anx8XJZl6eKLL9bevXvV3Ny8oJ/D06dPa8eOHXrjjTfk\ndru1e/duffazn13QsUsm2AAAmKwkbokDAGA6gg0AgAEINgAABiDYAAAYgGADAGAAgg0AgAEINgAA\nBiDYAAAY4H8BjBhbMrH6Rm0AAAAASUVORK5CYII=\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"metadata":{"id":"AHh90qdm_FxG","colab_type":"code","colab":{}},"cell_type":"code","source":["n_components = 630\n","pca = PCA(n_components=n_components, svd_solver='randomized',\n"," whiten=True).fit(X_train)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"rsLmv4HxFEwl","colab_type":"code","colab":{}},"cell_type":"code","source":["X_train_pca = pca.transform(X_train)\n","X_test_pca = pca.transform(X_test)"],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"text","id":"9uVHuupBb03r"},"cell_type":"markdown","source":["#### MODEL"]},{"metadata":{"id":"GjKD1mDybdML","colab_type":"code","outputId":"b9711a34-a120-4d1c-9026-802913662c46","executionInfo":{"status":"ok","timestamp":1551149374329,"user_tz":-300,"elapsed":1396,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":72}},"cell_type":"code","source":["clf = svm.OneClassSVM(nu=0.05,kernel='rbf')\n","clf = clf.fit(X_train_pca)"],"execution_count":142,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n"," \"avoid this warning.\", FutureWarning)\n"],"name":"stderr"}]},{"metadata":{"id":"Ueb80q1RGoH9","colab_type":"code","colab":{}},"cell_type":"code","source":["y_pred = clf.predict(X_test_pca) #pca ones here also"],"execution_count":0,"outputs":[]},{"metadata":{"id":"DWWN_EPF4avN","colab_type":"code","colab":{}},"cell_type":"code","source":["y_pred[y_pred == -1] = 0"],"execution_count":0,"outputs":[]},{"metadata":{"id":"GZ-lk9TVGozA","colab_type":"code","outputId":"665a3909-9b89-403b-8272-e86fcf69683f","executionInfo":{"status":"ok","timestamp":1551149376229,"user_tz":-300,"elapsed":782,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":295}},"cell_type":"code","source":["print(\"Phone: \",phone)\n","print(classification_report(y_test, y_pred, target_names=['Mispronounced','Correct']))\n","print(confusion_matrix(y_test, y_pred, labels=range(2)))\n","\n","print(sklm.roc_curve(y_test,y_pred))\n","print('FAR = ', sklm.roc_curve(y_test,y_pred)[0][1]*100,'%')\n","print('FRR = ', (1 - sklm.roc_curve(y_test,y_pred)[1][1])*100,'%')\n","print(\"AUC_SCORE = \", sklm.roc_auc_score(y_test,y_pred)*100,'%')"],"execution_count":145,"outputs":[{"output_type":"stream","text":["Phone: L\n"," precision recall f1-score support\n","\n","Mispronounced 0.57 0.75 0.65 123\n"," Correct 0.57 0.38 0.45 109\n","\n"," micro avg 0.57 0.57 0.57 232\n"," macro avg 0.57 0.56 0.55 232\n"," weighted avg 0.57 0.57 0.56 232\n","\n","[[92 31]\n"," [68 41]]\n","(array([0. , 0.25203252, 1. ]), array([0. , 0.37614679, 1. ]), array([2, 1, 0]))\n","FAR = 25.203252032520325 %\n","FRR = 62.38532110091744 %\n","AUC_SCORE = 56.205713433281126 %\n"],"name":"stdout"}]},{"metadata":{"id":"OBvvHqpdpdNv","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![Contributors][contributors-shield]]() [![LinkedIn][linkedin-shield]][linkedin-url] 3 | 4 | 5 | 6 | 7 |
8 |

9 | 10 | Logo 11 | 12 |

13 | 14 |

Pronunciation Verification using Anomaly Detection

15 |

16 | A MS thesis @ITU 17 |

18 | 19 | 20 | ## Table of Contents 21 | 22 | * [Abstract](#abstract) 23 | * [Frameworks](#frameworks) 24 | * [Getting Started](#getting-started) 25 | * [Prerequisites](#prerequisites) 26 | * [Installation](#installation) 27 | * [Usage](#usage) 28 | * [Contributing](#contributing) 29 | * [Contact](#contact) 30 | * [Acknowledgements](#acknowledgements) 31 | 32 | 33 | 34 | 35 | ## Abstract 36 | 37 | 38 | The automatic evaluation of speech is a powerful tool in second language acquisition. Computer aided pronunciation training (CAPT) systems aim to automatically detect incorrectly pronounced words, and quantify the quality of the pronunciations - which arguably is one of the hardest tasks in speech processing domain. The lack of sufficient mispronunciation tagged data seriously impedes the accurate detection of incorrect pronunciations. To handle this problem, in this paper, we have used the approach of tackling phone-level pronunciation verification problem as an anomaly detection problem using two different techniques, representation learning and auto-encoder technique, in a semi-supervised learning set-up. The Representation leaning model uses a Deep Convolutional Neural Network (DCNN) to learn a rich set of discriminative features through a classification task, while the Auto-encoder model uses a Convolutional Neural Network-Deep Auto-encoder (CNN-DAE) architecture to learn to reconstruct the correct pronunciations with least error. Previously [2] has used the anomaly detection method on frame level with a hybrid model - a combination of deep and shallow architectures. The plus point of using a deep architecture as opposed to a shallow or a hybrid architecture is that we can train our models in an end to end fashion using a task specific loss function instead of using hand-crafted features or feature extractors before a shallow learner without a task specific loss function. Posing the pronunciation verification problem as an anomaly detection problem allows us to train our models using only the positive (correct) class. We have experimented with two different architectures and a number of combinations of different data-sets. We also contribute a force aligned data-set containing isolated speech from professional English speakers of the Cambridge dictionary [3], along with the results of our mispronunciation detection models on this data-set. We have achieved a F1-score of 75% on this data-set. For cross corpus evaluation, we have selected the challenging L2-Arctic corpus [4], and has achieved a F1-score of 24% using phone level processing and targeting all three types of pronunciation errors, which is near to the best frame level F1-score of 29% on this data-set with substitution errors only. 39 | 40 | ### Frameworks 41 | 42 | * [Python](https://python.org) 43 | * [Google Colab](google.com) 44 | * [Jupyter Notebooks](https://jupyter.org/) 45 | 46 | 47 | 48 | ## Getting Started 49 | 50 | Each of the notebooks in the repository represent a stand-alone experiment. 51 | 52 | ### Prerequisites 53 | 54 | * Python 3.6 or higher 55 | ```sh 56 | sudo apt-get python3 57 | ``` 58 | * jupyter notebook 59 | ```sh 60 | pip install jupyter 61 | ``` 62 | 63 | ### Installation 64 | 65 | 1. Clone the repo 66 | ```sh 67 | git clone https:://github.com/your_username_/Project-Name.git 68 | ``` 69 | 2. Install Python and Jupyter 70 | ```sh 71 | sudo apt-get python3 72 | ``` 73 | 4. Run the cells of the notebook 74 | 75 | 76 | 77 | 78 | ## Usage 79 | 80 | Every notebook has been named as follows: 81 | 82 | ***experimentLevel_modelType_features_trainingSet_testSet*** 83 | 84 | 85 | 86 | ## Contributing 87 | 88 | Contributions are what make the open source community such an amazing place to be learn, inspire, and create. Any contributions you make are **greatly appreciated**. 89 | 90 | 1. Fork the Project 91 | 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`) 92 | 3. Commit your Changes (`git commit -m 'Add some AmazingFeature`) 93 | 4. Push to the Branch (`git push origin feature/AmazingFeature`) 94 | 5. Open a Pull Request 95 | 96 | 97 | 98 | 99 | ## References 100 | 101 | [1] S. M. Witt et al., “Use of speech recognition in computer-assisted language learning,” 1999. 102 | [2] M. Shahin, B. Ahmed, J. X. Ji, and K. Ballard, “Anomaly detection approach for pronunciation verification of disordered speech using speech attribute features,” in Proc. Interspeech 2018, 2018, pp. 1671–1675. [Online]. Available: http://dx.doi.org/10.21437/Interspeech.2018-1319 103 | [3] “English dictionary, translations & thesaurus.” [Online]. Available: https://dictionary. 104 | cambridge.org/ 105 | [4] G. Zhao, S. Sonsaat, A. Silpachai, I. Lucic, E. Chukharev-Hudilainen, J. Levis, and R. GutierrezOsuna, “L2-arctic: A non-native english speech corpus,” in Proc. Interspeech 2018, 2018, pp. 2783–2787. [Online]. Available: http://dx.doi.org/10.21437/Interspeech.2018-1110 106 | 107 | 108 | 109 | 110 | ## Contact 111 | 112 | Jawad Arshad - jawad.arshad@itu.edu.pk 113 | 114 | Project Link: [https://github.com/JawadAr/Pronunciation-verification-using-anomaly-detection-Thesis](https://github.com/JawadAr/Pronunciation-verification-using-anomaly-detection-Thesis) 115 | 116 | 117 | 118 | 119 | ## Acknowledgements 120 | 121 | I am very thankful to Dr. Agha Ali for being very kind and supportive throughout my thesis. 122 | I thank all the members of [CSaLT](http://csalt.itu.edu.pk/) for all the motivational and technical help, special thanks 123 | to Haris Bin Zia - he was the goto guy for every problem I faced during the learning phases 124 | 125 | * [GitHub Emoji Cheat Sheet](https://www.webpagefx.com/tools/emoji-cheat-sheet) 126 | * [Img Shields](https://shields.io) 127 | * [Choose an Open Source License](https://choosealicense.com) 128 | * [GitHub Pages](https://pages.github.com) 129 | 130 | 131 | 132 | [build-shield]: https://img.shields.io/badge/build-passing-brightgreen.svg?style=flat-square 133 | [contributors-shield]: https://img.shields.io/badge/contributors-1-orange.svg?style=flat-square 134 | [license-shield]: https://img.shields.io/badge/license-MIT-blue.svg?style=flat-square 135 | [license-url]: https://choosealicense.com/licenses/mit 136 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=flat-square&logo=linkedin&colorB=555 137 | [linkedin-url]: https://www.linkedin.com/in/jawad-arshad-5b2438166/ 138 | [product-screenshot]: https://github.com/JawadAr/Pronunciation-verification-using-anomaly-detection-Thesis/blob/master/download.png 139 | -------------------------------------------------------------------------------- /Train Val Test and Train scalings for Cambridge Big - LIBROSA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "'D:\\\\Thesis Work\\\\InitialDemoWork\\\\Cambridge_bigger USING LIBROSA'" 12 | ] 13 | }, 14 | "execution_count": 3, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "pwd" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import numpy as np" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "(41,)" 41 | ] 42 | }, 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "phones = np.load('./phoneList.npy',allow_pickle=True)\n", 50 | "phones.shape" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 9, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "Phone: AA\n", 63 | "Total X shape: (3889, 13, 50, 3)\n", 64 | "Shape of X_train: (2723, 13, 50, 3)\n", 65 | "Shape of X_val: (388, 13, 50, 3)\n", 66 | "Shape of X_test: (778, 13, 50, 3)\n", 67 | "\n", 68 | "\n", 69 | "Phone: sp\n", 70 | "Total X shape: (20712, 13, 50, 3)\n", 71 | "Shape of X_train: (14499, 13, 50, 3)\n", 72 | "Shape of X_val: (2071, 13, 50, 3)\n", 73 | "Shape of X_test: (4142, 13, 50, 3)\n", 74 | "\n", 75 | "\n", 76 | "Phone: R\n", 77 | "Total X shape: (10364, 13, 50, 3)\n", 78 | "Shape of X_train: (7255, 13, 50, 3)\n", 79 | "Shape of X_val: (1036, 13, 50, 3)\n", 80 | "Shape of X_test: (2073, 13, 50, 3)\n", 81 | "\n", 82 | "\n", 83 | "Phone: EY\n", 84 | "Total X shape: (3786, 13, 50, 3)\n", 85 | "Shape of X_train: (2651, 13, 50, 3)\n", 86 | "Shape of X_val: (378, 13, 50, 3)\n", 87 | "Shape of X_test: (757, 13, 50, 3)\n", 88 | "\n", 89 | "\n", 90 | "Phone: B\n", 91 | "Total X shape: (4367, 13, 50, 3)\n", 92 | "Shape of X_train: (3057, 13, 50, 3)\n", 93 | "Shape of X_val: (436, 13, 50, 3)\n", 94 | "Shape of X_test: (874, 13, 50, 3)\n", 95 | "\n", 96 | "\n", 97 | "Phone: IY\n", 98 | "Total X shape: (8690, 13, 50, 3)\n", 99 | "Shape of X_train: (6083, 13, 50, 3)\n", 100 | "Shape of X_val: (869, 13, 50, 3)\n", 101 | "Shape of X_test: (1738, 13, 50, 3)\n", 102 | "\n", 103 | "\n", 104 | "Phone: AH\n", 105 | "Total X shape: (21038, 13, 50, 3)\n", 106 | "Shape of X_train: (14727, 13, 50, 3)\n", 107 | "Shape of X_val: (2103, 13, 50, 3)\n", 108 | "Shape of X_test: (4208, 13, 50, 3)\n", 109 | "\n", 110 | "\n", 111 | "Phone: AE\n", 112 | "Total X shape: (4927, 13, 50, 3)\n", 113 | "Shape of X_train: (3449, 13, 50, 3)\n", 114 | "Shape of X_val: (492, 13, 50, 3)\n", 115 | "Shape of X_test: (986, 13, 50, 3)\n", 116 | "\n", 117 | "\n", 118 | "Phone: K\n", 119 | "Total X shape: (9416, 13, 50, 3)\n", 120 | "Shape of X_train: (6592, 13, 50, 3)\n", 121 | "Shape of X_val: (941, 13, 50, 3)\n", 122 | "Shape of X_test: (1883, 13, 50, 3)\n", 123 | "\n", 124 | "\n", 125 | "Phone: S\n", 126 | "Total X shape: (12562, 13, 50, 3)\n", 127 | "Shape of X_train: (8794, 13, 50, 3)\n", 128 | "Shape of X_val: (1256, 13, 50, 3)\n", 129 | "Shape of X_test: (2512, 13, 50, 3)\n", 130 | "\n", 131 | "\n", 132 | "Phone: sil\n", 133 | "Total X shape: (3862, 13, 50, 3)\n", 134 | "Shape of X_train: (2704, 13, 50, 3)\n", 135 | "Shape of X_val: (386, 13, 50, 3)\n", 136 | "Shape of X_test: (772, 13, 50, 3)\n", 137 | "\n", 138 | "\n", 139 | "Phone: F\n", 140 | "Total X shape: (3568, 13, 50, 3)\n", 141 | "Shape of X_train: (2498, 13, 50, 3)\n", 142 | "Shape of X_val: (356, 13, 50, 3)\n", 143 | "Shape of X_test: (714, 13, 50, 3)\n", 144 | "\n", 145 | "\n", 146 | "Phone: T\n", 147 | "Total X shape: (13505, 13, 50, 3)\n", 148 | "Shape of X_train: (9454, 13, 50, 3)\n", 149 | "Shape of X_val: (1350, 13, 50, 3)\n", 150 | "Shape of X_test: (2701, 13, 50, 3)\n", 151 | "\n", 152 | "\n", 153 | "Phone: L\n", 154 | "Total X shape: (12399, 13, 50, 3)\n", 155 | "Shape of X_train: (8680, 13, 50, 3)\n", 156 | "Shape of X_val: (1239, 13, 50, 3)\n", 157 | "Shape of X_test: (2480, 13, 50, 3)\n", 158 | "\n", 159 | "\n", 160 | "Phone: OW\n", 161 | "Total X shape: (2691, 13, 50, 3)\n", 162 | "Shape of X_train: (1884, 13, 50, 3)\n", 163 | "Shape of X_val: (269, 13, 50, 3)\n", 164 | "Shape of X_test: (538, 13, 50, 3)\n", 165 | "\n", 166 | "\n", 167 | "Phone: N\n", 168 | "Total X shape: (14294, 13, 50, 3)\n", 169 | "Shape of X_train: (10006, 13, 50, 3)\n", 170 | "Shape of X_val: (1429, 13, 50, 3)\n", 171 | "Shape of X_test: (2859, 13, 50, 3)\n", 172 | "\n", 173 | "\n", 174 | "Phone: D\n", 175 | "Total X shape: (7424, 13, 50, 3)\n", 176 | "Shape of X_train: (5197, 13, 50, 3)\n", 177 | "Shape of X_val: (742, 13, 50, 3)\n", 178 | "Shape of X_test: (1485, 13, 50, 3)\n", 179 | "\n", 180 | "\n", 181 | "Phone: M\n", 182 | "Total X shape: (6341, 13, 50, 3)\n", 183 | "Shape of X_train: (4439, 13, 50, 3)\n", 184 | "Shape of X_val: (634, 13, 50, 3)\n", 185 | "Shape of X_test: (1268, 13, 50, 3)\n", 186 | "\n", 187 | "\n", 188 | "Phone: SH\n", 189 | "Total X shape: (2828, 13, 50, 3)\n", 190 | "Shape of X_train: (1980, 13, 50, 3)\n", 191 | "Shape of X_val: (282, 13, 50, 3)\n", 192 | "Shape of X_test: (566, 13, 50, 3)\n", 193 | "\n", 194 | "\n", 195 | "Phone: W\n", 196 | "Total X shape: (1839, 13, 50, 3)\n", 197 | "Shape of X_train: (1288, 13, 50, 3)\n", 198 | "Shape of X_val: (183, 13, 50, 3)\n", 199 | "Shape of X_test: (368, 13, 50, 3)\n", 200 | "\n", 201 | "\n", 202 | "Phone: EH\n", 203 | "Total X shape: (5788, 13, 50, 3)\n", 204 | "Shape of X_train: (4052, 13, 50, 3)\n", 205 | "Shape of X_val: (578, 13, 50, 3)\n", 206 | "Shape of X_test: (1158, 13, 50, 3)\n", 207 | "\n", 208 | "\n", 209 | "Phone: V\n", 210 | "Total X shape: (2738, 13, 50, 3)\n", 211 | "Shape of X_train: (1917, 13, 50, 3)\n", 212 | "Shape of X_val: (273, 13, 50, 3)\n", 213 | "Shape of X_test: (548, 13, 50, 3)\n", 214 | "\n", 215 | "\n", 216 | "Phone: IH\n", 217 | "Total X shape: (13094, 13, 50, 3)\n", 218 | "Shape of X_train: (9166, 13, 50, 3)\n", 219 | "Shape of X_val: (1309, 13, 50, 3)\n", 220 | "Shape of X_test: (2619, 13, 50, 3)\n", 221 | "\n", 222 | "\n", 223 | "Phone: ER\n", 224 | "Total X shape: (6155, 13, 50, 3)\n", 225 | "Shape of X_train: (4309, 13, 50, 3)\n", 226 | "Shape of X_val: (615, 13, 50, 3)\n", 227 | "Shape of X_test: (1231, 13, 50, 3)\n", 228 | "\n", 229 | "\n", 230 | "Phone: AY\n", 231 | "Total X shape: (2661, 13, 50, 3)\n", 232 | "Shape of X_train: (1863, 13, 50, 3)\n", 233 | "Shape of X_val: (266, 13, 50, 3)\n", 234 | "Shape of X_test: (532, 13, 50, 3)\n", 235 | "\n", 236 | "\n", 237 | "Phone: HH\n", 238 | "Total X shape: (1478, 13, 50, 3)\n", 239 | "Shape of X_train: (1035, 13, 50, 3)\n", 240 | "Shape of X_val: (147, 13, 50, 3)\n", 241 | "Shape of X_test: (296, 13, 50, 3)\n", 242 | "\n", 243 | "\n", 244 | "Phone: AO\n", 245 | "Total X shape: (2400, 13, 50, 3)\n", 246 | "Shape of X_train: (1680, 13, 50, 3)\n", 247 | "Shape of X_val: (240, 13, 50, 3)\n", 248 | "Shape of X_test: (480, 13, 50, 3)\n", 249 | "\n", 250 | "\n", 251 | "Phone: NG\n", 252 | "Total X shape: (2154, 13, 50, 3)\n", 253 | "Shape of X_train: (1508, 13, 50, 3)\n", 254 | "Shape of X_val: (215, 13, 50, 3)\n", 255 | "Shape of X_test: (431, 13, 50, 3)\n", 256 | "\n", 257 | "\n", 258 | "Phone: JH\n", 259 | "Total X shape: (1649, 13, 50, 3)\n", 260 | "Shape of X_train: (1155, 13, 50, 3)\n", 261 | "Shape of X_val: (164, 13, 50, 3)\n", 262 | "Shape of X_test: (330, 13, 50, 3)\n", 263 | "\n", 264 | "\n", 265 | "Phone: Z\n", 266 | "Total X shape: (2180, 13, 50, 3)\n", 267 | "Shape of X_train: (1526, 13, 50, 3)\n", 268 | "Shape of X_val: (218, 13, 50, 3)\n", 269 | "Shape of X_test: (436, 13, 50, 3)\n", 270 | "\n", 271 | "\n", 272 | "Phone: UW\n", 273 | "Total X shape: (2213, 13, 50, 3)\n", 274 | "Shape of X_train: (1550, 13, 50, 3)\n", 275 | "Shape of X_val: (221, 13, 50, 3)\n", 276 | "Shape of X_test: (442, 13, 50, 3)\n", 277 | "\n", 278 | "\n", 279 | "Phone: G\n", 280 | "Total X shape: (2486, 13, 50, 3)\n", 281 | "Shape of X_train: (1741, 13, 50, 3)\n", 282 | "Shape of X_val: (248, 13, 50, 3)\n", 283 | "Shape of X_test: (497, 13, 50, 3)\n", 284 | "\n", 285 | "\n", 286 | "Phone: AW\n", 287 | "Total X shape: (739, 13, 50, 3)\n", 288 | "Shape of X_train: (518, 13, 50, 3)\n", 289 | "Shape of X_val: (73, 13, 50, 3)\n", 290 | "Shape of X_test: (148, 13, 50, 3)\n", 291 | "\n", 292 | "\n", 293 | "Phone: ZH\n", 294 | "Total X shape: (190, 13, 50, 3)\n", 295 | "Shape of X_train: (133, 13, 50, 3)\n", 296 | "Shape of X_val: (19, 13, 50, 3)\n", 297 | "Shape of X_test: (38, 13, 50, 3)\n", 298 | "\n", 299 | "\n", 300 | "Phone: P\n", 301 | "Total X shape: (6113, 13, 50, 3)\n", 302 | "Shape of X_train: (4280, 13, 50, 3)\n", 303 | "Shape of X_val: (611, 13, 50, 3)\n", 304 | "Shape of X_test: (1222, 13, 50, 3)\n", 305 | "\n", 306 | "\n", 307 | "Phone: TH\n", 308 | "Total X shape: (726, 13, 50, 3)\n", 309 | "Shape of X_train: (509, 13, 50, 3)\n", 310 | "Shape of X_val: (72, 13, 50, 3)\n", 311 | "Shape of X_test: (145, 13, 50, 3)\n", 312 | "\n", 313 | "\n", 314 | "Phone: Y\n", 315 | "Total X shape: (1289, 13, 50, 3)\n", 316 | "Shape of X_train: (903, 13, 50, 3)\n", 317 | "Shape of X_val: (128, 13, 50, 3)\n", 318 | "Shape of X_test: (258, 13, 50, 3)\n", 319 | "\n", 320 | "\n", 321 | "Phone: CH\n", 322 | "Total X shape: (1111, 13, 50, 3)\n", 323 | "Shape of X_train: (778, 13, 50, 3)\n", 324 | "Shape of X_val: (111, 13, 50, 3)\n", 325 | "Shape of X_test: (222, 13, 50, 3)\n", 326 | "\n", 327 | "\n", 328 | "Phone: UH\n", 329 | "Total X shape: (556, 13, 50, 3)\n", 330 | "Shape of X_train: (390, 13, 50, 3)\n", 331 | "Shape of X_val: (55, 13, 50, 3)\n", 332 | "Shape of X_test: (111, 13, 50, 3)\n", 333 | "\n", 334 | "\n", 335 | "Phone: OY\n", 336 | "Total X shape: (295, 13, 50, 3)\n", 337 | "Shape of X_train: (207, 13, 50, 3)\n", 338 | "Shape of X_val: (29, 13, 50, 3)\n", 339 | "Shape of X_test: (59, 13, 50, 3)\n", 340 | "\n", 341 | "\n", 342 | "Phone: DH\n", 343 | "Total X shape: (205, 13, 50, 3)\n", 344 | "Shape of X_train: (144, 13, 50, 3)\n", 345 | "Shape of X_val: (20, 13, 50, 3)\n", 346 | "Shape of X_test: (41, 13, 50, 3)\n", 347 | "\n", 348 | "\n" 349 | ] 350 | } 351 | ], 352 | "source": [ 353 | "for phone in phones:\n", 354 | " print(\"Phone: \", phone)\n", 355 | " X = np.load('./CAMBRIDGE_BIGGER/'+phone+'_X.npy', allow_pickle=True)\n", 356 | " np.random.shuffle(X)\n", 357 | " print(\"Total X shape: \", X.shape)\n", 358 | " \n", 359 | " X_train = X[:int(np.ceil(X.shape[0]*0.7))]\n", 360 | " X_test = X[int(np.ceil(X.shape[0]*0.7)):int(np.ceil(X.shape[0]*0.9))]\n", 361 | " X_val = X[int(np.ceil(X.shape[0]*0.9)):]\n", 362 | " \n", 363 | " print(\"Shape of X_train: \", X_train.shape)\n", 364 | " print(\"Shape of X_val: \", X_val.shape)\n", 365 | " print(\"Shape of X_test: \", X_test.shape)\n", 366 | "\n", 367 | " np.save(\"./Cambridge_bigger Phones Train/\"+phone+\"_X.npy\",X_train)\n", 368 | " np.save(\"./Cambridge_bigger Phones Dev/\"+phone+\"_X.npy\",X_val)\n", 369 | " np.save(\"./Cambridge_bigger Phones Test/\"+phone+\"_X.npy\",X_test)\n", 370 | " \n", 371 | " print(\"\\n\")" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 2, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "from tqdm import tqdm_notebook" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 15, 386 | "metadata": {}, 387 | "outputs": [ 388 | { 389 | "data": { 390 | "application/vnd.jupyter.widget-view+json": { 391 | "model_id": "a01b36ede3454055933bb68b00c2e3d9", 392 | "version_major": 2, 393 | "version_minor": 0 394 | }, 395 | "text/plain": [ 396 | "HBox(children=(IntProgress(value=0, max=41), HTML(value='')))" 397 | ] 398 | }, 399 | "metadata": {}, 400 | "output_type": "display_data" 401 | }, 402 | { 403 | "name": "stdout", 404 | "output_type": "stream", 405 | "text": [ 406 | "\n" 407 | ] 408 | } 409 | ], 410 | "source": [ 411 | "for phone in tqdm_notebook(phones):\n", 412 | " X_train = np.load(\"./Cambridge_bigger Phones Train/\"+phone+\"_X.npy\",allow_pickle=True)\n", 413 | " Xmax = np.max(X_train,(0,1,2))\n", 414 | " Xmin = np.min(X_train,(0,1,2))\n", 415 | " np.save(\"Cambridge_bigger Train MinMax/\"+\"min_\"+phone+\".npy\", Xmin)\n", 416 | " np.save(\"Cambridge_bigger Train MinMax/\"+\"max_\"+phone+\".npy\", Xmax)" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 5, 422 | "metadata": {}, 423 | "outputs": [ 424 | { 425 | "data": { 426 | "application/vnd.jupyter.widget-view+json": { 427 | "model_id": "c41f49067d4f44d5b391b600cc958130", 428 | "version_major": 2, 429 | "version_minor": 0 430 | }, 431 | "text/plain": [ 432 | "HBox(children=(IntProgress(value=0, max=41), HTML(value='')))" 433 | ] 434 | }, 435 | "metadata": {}, 436 | "output_type": "display_data" 437 | }, 438 | { 439 | "name": "stdout", 440 | "output_type": "stream", 441 | "text": [ 442 | "\n" 443 | ] 444 | } 445 | ], 446 | "source": [ 447 | "# For Tanh work\n", 448 | "for phone in tqdm_notebook(phones):\n", 449 | " X_train = np.load(\"./Cambridge_bigger Phones Train/\"+phone+\"_X.npy\",allow_pickle=True)\n", 450 | " Xmax = np.max(np.abs(X_train),(0,1,2))\n", 451 | " np.save(\"Cambridge_bigger Train AbsMax/\"+\"max_\"+phone+\".npy\", Xmax)" 452 | ] 453 | }, 454 | { 455 | "cell_type": "markdown", 456 | "metadata": {}, 457 | "source": [ 458 | "### Training on another notebook" 459 | ] 460 | } 461 | ], 462 | "metadata": { 463 | "kernelspec": { 464 | "display_name": "Python 3", 465 | "language": "python", 466 | "name": "python3" 467 | }, 468 | "language_info": { 469 | "codemirror_mode": { 470 | "name": "ipython", 471 | "version": 3 472 | }, 473 | "file_extension": ".py", 474 | "mimetype": "text/x-python", 475 | "name": "python", 476 | "nbconvert_exporter": "python", 477 | "pygments_lexer": "ipython3", 478 | "version": "3.7.3" 479 | } 480 | }, 481 | "nbformat": 4, 482 | "nbformat_minor": 2 483 | } 484 | -------------------------------------------------------------------------------- /WordLevel_RL_MP_MFCC_TrainL2_TestL2.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"WordLevel_RL_MP_MFCC_TrainL2_TestL2.ipynb","version":"0.3.2","provenance":[{"file_id":"1P90PoWrDm5O2YkZmI6GKpf7xa63cf7qD","timestamp":1548422026760}],"collapsed_sections":["pq6rMQPPuFR6","TglRxRiJwJg3","Dhfp1slu8-DC","LQpZ0WnrOBbB","EGyVPsDBRaLv"]},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"metadata":{"id":"R77w5oYLiAmk","colab_type":"code","outputId":"b673dbb1-5288-494d-ecd3-be0915937567","executionInfo":{"status":"ok","timestamp":1548395258997,"user_tz":-300,"elapsed":720,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["pwd"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["'/content'"]},"metadata":{"tags":[]},"execution_count":3}]},{"metadata":{"colab_type":"code","outputId":"0ae4ec44-11d7-4cce-8b5b-9dab3efd0186","executionInfo":{"status":"ok","timestamp":1548395289330,"user_tz":-300,"elapsed":29064,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"d1eEVQxriiEp","colab":{"base_uri":"https://localhost:8080/","height":121}},"cell_type":"code","source":["from google.colab import drive\n","drive.mount('./drive')"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n","\n","Enter your authorization code:\n","··········\n","Mounted at ./drive\n"],"name":"stdout"}]},{"metadata":{"id":"hC9IB51OznZN","colab_type":"code","outputId":"d6d3c3eb-0ec9-4fb4-8523-ab672b1cd578","executionInfo":{"status":"ok","timestamp":1548395296516,"user_tz":-300,"elapsed":3252,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":168}},"cell_type":"code","source":["!ls './drive/My Drive/Thesis Work'"],"execution_count":0,"outputs":[{"output_type":"stream","text":[" CambridgeMonoPhoneDf.pickle\t jawadmodel2.h5\n"," CambridgeWordFeaturesDf.pickle L2monoPhoneDfTest.pickle\n"," CheckScrapSpeed.ipynb\t\t L2monoPhoneDfTrain.pickle\n"," ConvertToWav.ipynb\t\t L2WordFeaturesDfTest.pickle\n","'Copy of l2arctic_release_v2.0.zip' L2WordFeaturesDfTrain.pickle\n"," Implementation1\t\t ModelFitting.ipynb\n"," Implementation2\t\t MostCommon3000Oxford.txt\n"," Implementation3\t\t UKpronunciations\n"," Implementation4\t\t USpronunciations\n"],"name":"stdout"}]},{"metadata":{"id":"g2Osav3708xy","colab_type":"text"},"cell_type":"markdown","source":["Bism \n","# Imports"]},{"metadata":{"id":"Nz23OfFgoMDo","colab_type":"code","outputId":"eb30630d-e236-44ff-d4dc-5cb14074b1fb","executionInfo":{"status":"ok","timestamp":1548395317185,"user_tz":-300,"elapsed":3278,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["import pandas as pd\n","import numpy as np\n","import os\n","import IPython.display as ipd\n","import librosa\n","import matplotlib\n","import matplotlib.pyplot as plt\n","%matplotlib inline\n","matplotlib.style.use('ggplot')\n","from tqdm import tqdm_notebook\n","\n","\n","#SKLEARN\n","from sklearn.preprocessing import LabelBinarizer\n","import sklearn.metrics as sklm\n","\n","#KERAS\n","from keras import models\n","from keras.models import Model, load_model\n","from keras import callbacks\n","import keras\n","from keras.models import Sequential\n","from keras.layers import Input, Dense, Dropout, Flatten, BatchNormalization\n","from keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Conv2DTranspose, AveragePooling2D\n","from keras.optimizers import SGD, Adam"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Using TensorFlow backend.\n"],"name":"stderr"}]},{"metadata":{"colab_type":"text","id":"yXk_EmxSEvuY"},"cell_type":"markdown","source":["# Implementation 2 (Word Level; Single Phone Mistake)\n","## L2 as Training set\n","## L2 as val+test set (utterances held)"]},{"metadata":{"id":"eTcFZjnwppob","colab_type":"text"},"cell_type":"markdown","source":["### Training"]},{"metadata":{"id":"d8zAPt8mqJ3y","colab_type":"code","outputId":"adda0563-d9d8-4559-98e9-e06832aa7798","executionInfo":{"status":"ok","timestamp":1548246550513,"user_tz":-300,"elapsed":3838,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":151}},"cell_type":"code","source":["!ls ./drive/My\\ Drive/Thesis\\ Work/"],"execution_count":0,"outputs":[{"output_type":"stream","text":[" CambridgeMonoPhoneDf.pickle\t L2monoPhoneDfTest.pickle\n"," CambridgeWordFeaturesDf.pickle L2monoPhoneDfTrain.pickle\n"," CheckScrapSpeed.ipynb\t\t L2WordFeaturesDfTest.pickle\n"," ConvertToWav.ipynb\t\t L2WordFeaturesDfTrain.pickle\n","'Copy of l2arctic_release_v2.0.zip' ModelFitting.ipynb\n"," Implementation1\t\t MostCommon3000Oxford.txt\n"," Implementation2\t\t UKpronunciations\n"," jawadmodel2.h5\t\t\t USpronunciations\n"],"name":"stdout"}]},{"metadata":{"id":"5-hpYZx3pui1","colab_type":"text"},"cell_type":"markdown","source":["#### X_TRAIN"]},{"metadata":{"colab_type":"code","id":"1L89mvzfqAlX","colab":{}},"cell_type":"code","source":["train_featureDf = pd.read_pickle(\"./drive/My Drive/Thesis Work/L2WordFeaturesDfTrain.pickle\")\n","train_featureDf['deltaC'] = train_featureDf.MFCC.apply(librosa.feature.delta)\n","train_featureDf['doubleDeltaC'] = train_featureDf.MFCC.apply(librosa.feature.delta, order=2)"],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"code","outputId":"3ff8d9b8-bc43-481c-f16e-1d82541a2fe8","executionInfo":{"status":"ok","timestamp":1548246611727,"user_tz":-300,"elapsed":1233,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"u98yI25ZqAlb","colab":{"base_uri":"https://localhost:8080/","height":383}},"cell_type":"code","source":["train_featureDf.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
wordMFCCmispronouncedPhonescorrectPhonesphoneErrorPercentagedeltaCdoubleDeltaC
0for[[-482.1258206939065, -459.09806101859346, -42...[][F, AO1, R]0.0[[22.44070645615178, 22.44070645615178, 22.440...[[-5.693856662125653, -5.693856662125653, -5.6...
1the[[-383.4356920195703, -380.1235950970762, -373...[][DH, AH1]0.0[[-2.1619768795545644, -2.1619768795545644, -2...[[-2.1235506683236864, -2.1235506683236864, -2...
2twentieth[[-289.88557478827323, -296.3296463780561, -34...[][T, W, EH1, N, T, IY0, IH0, TH]0.0[[-4.634610784712385, -4.634610784712385, -4.6...[[12.767461152369506, 12.767461152369506, 12.7...
3time[[-493.52918905777267, -484.3543551059837, -48...[][T, AY1, M]0.0[[32.19132648271329, 32.19132648271329, 32.191...[[-2.780640526801415, -2.780640526801415, -2.7...
4that[[-462.2926228298184, -460.43184706242124, -46...[][DH, AE1, T]0.0[[3.9526547558469938, 3.9526547558469938, 3.95...[[1.306252970785217, 1.306252970785217, 1.3062...
\n","
"],"text/plain":[" word MFCC \\\n","0 for [[-482.1258206939065, -459.09806101859346, -42... \n","1 the [[-383.4356920195703, -380.1235950970762, -373... \n","2 twentieth [[-289.88557478827323, -296.3296463780561, -34... \n","3 time [[-493.52918905777267, -484.3543551059837, -48... \n","4 that [[-462.2926228298184, -460.43184706242124, -46... \n","\n"," mispronouncedPhones correctPhones phoneErrorPercentage \\\n","0 [] [F, AO1, R] 0.0 \n","1 [] [DH, AH1] 0.0 \n","2 [] [T, W, EH1, N, T, IY0, IH0, TH] 0.0 \n","3 [] [T, AY1, M] 0.0 \n","4 [] [DH, AE1, T] 0.0 \n","\n"," deltaC \\\n","0 [[22.44070645615178, 22.44070645615178, 22.440... \n","1 [[-2.1619768795545644, -2.1619768795545644, -2... \n","2 [[-4.634610784712385, -4.634610784712385, -4.6... \n","3 [[32.19132648271329, 32.19132648271329, 32.191... \n","4 [[3.9526547558469938, 3.9526547558469938, 3.95... \n","\n"," doubleDeltaC \n","0 [[-5.693856662125653, -5.693856662125653, -5.6... \n","1 [[-2.1235506683236864, -2.1235506683236864, -2... \n","2 [[12.767461152369506, 12.767461152369506, 12.7... \n","3 [[-2.780640526801415, -2.780640526801415, -2.7... \n","4 [[1.306252970785217, 1.306252970785217, 1.3062... "]},"metadata":{"tags":[]},"execution_count":7}]},{"metadata":{"id":"Ae8QjBLvsSc1","colab_type":"code","colab":{}},"cell_type":"code","source":["train_featureDf = train_featureDf[train_featureDf.phoneErrorPercentage == 0] #only the 100% correct ones for training"],"execution_count":0,"outputs":[]},{"metadata":{"id":"ckk_oYOBst-J","colab_type":"code","outputId":"3ef6b1b0-5cf3-412e-fdf7-87a910331bc0","executionInfo":{"status":"ok","timestamp":1548246800493,"user_tz":-300,"elapsed":1110,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["train_featureDf.word.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(11893,)"]},"metadata":{"tags":[]},"execution_count":15}]},{"metadata":{"colab_type":"code","outputId":"712009af-0495-44ab-bc52-c32fadc7962b","executionInfo":{"status":"ok","timestamp":1548246846317,"user_tz":-300,"elapsed":2715,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"A5XO8dSAqAle","colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["x_train = np.array((train_featureDf.MFCC,train_featureDf.deltaC,train_featureDf.doubleDeltaC))\n","x_train.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(3, 11893, 13, 118)"]},"metadata":{"tags":[]},"execution_count":16}]},{"metadata":{"colab_type":"code","outputId":"31d488fc-5cc9-4e45-c7c0-414d6ac3d7b0","executionInfo":{"status":"ok","timestamp":1548246849853,"user_tz":-300,"elapsed":1151,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"EQc5QsVVqAlj","colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["x_train = np.transpose(x_train,[1,2,3,0])\n","x_train.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(11893, 13, 118, 3)"]},"metadata":{"tags":[]},"execution_count":17}]},{"metadata":{"id":"zU-JYcLdtQ1W","colab_type":"code","colab":{}},"cell_type":"code","source":["train_words = train_featureDf.word\n","train_phones = train_featureDf.correctPhones #all correct"],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"code","id":"UWQtFGCmqAlp","colab":{}},"cell_type":"code","source":["np.save('./drive/My Drive/Thesis Work/Implementation2/x_train.npy',x_train) #Feature vector\n","np.save('./drive/My Drive/Thesis Work/Implementation2/train_words.npy',train_words) #words\n","np.save('./drive/My Drive/Thesis Work/Implementation2/train_phones.npy',train_phones) #phones per word"],"execution_count":0,"outputs":[]},{"metadata":{"id":"pq6rMQPPuFR6","colab_type":"text"},"cell_type":"markdown","source":["#### Y_TRAIN"]},{"metadata":{"colab_type":"code","outputId":"5dbb4175-e584-42a1-a6c6-0d144b51cd48","executionInfo":{"status":"ok","timestamp":1548247372343,"user_tz":-300,"elapsed":1140,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"cVZvm28huJrq","colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["encoder = LabelBinarizer()\n","y_train = encoder.fit_transform(train_words)\n","y_train.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(11893, 922)"]},"metadata":{"tags":[]},"execution_count":25}]},{"metadata":{"colab_type":"code","id":"lu_gih4RuJrw","colab":{}},"cell_type":"code","source":["np.save('./drive/My Drive/Thesis Work/Implementation2/y_train.npy',y_train) #save y_train"],"execution_count":0,"outputs":[]},{"metadata":{"id":"TglRxRiJwJg3","colab_type":"text"},"cell_type":"markdown","source":["#### MODEL"]},{"metadata":{"colab_type":"code","id":"EE3ELOw6waoE","colab":{}},"cell_type":"code","source":["model = Sequential()\n","# input: 100x100 images with 3 channels -> (100, 100, 3) tensors.\n","# this applies 32 convolution filters of size 3x3 each.\n","model.add(Conv2D(32, (3, 3),padding='same', activation='relu', input_shape=(13, 118, 3)))\n","model.add(Conv2D(32, (3, 3),padding='same', activation='relu'))\n","model.add(MaxPooling2D(pool_size=(2, 2)))\n","\n","model.add(Conv2D(64, (3, 3),padding='same', activation='relu'))\n","model.add(Conv2D(64, (3, 3), activation='relu'))\n","model.add(MaxPooling2D(pool_size=(2, 2)))\n","\n","model.add(Flatten())\n","model.add(Dense(64, activation='relu'))\n","model.add(Dense(922, activation='softmax'))\n","\n","callbacklist=[callbacks.History(),callbacks.ModelCheckpoint('./jawadmodel.h5', monitor='loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=20)]\n","model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])\n","\n","model.fit(x_train, y_train, batch_size=5000, epochs=150, callbacks=callbacklist)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"cdt10vwq7QiI","colab_type":"code","outputId":"19c4da96-5c6d-4a0d-f37b-1f8d21e4bbb0","executionInfo":{"status":"ok","timestamp":1548254599509,"user_tz":-300,"elapsed":273624,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":2150}},"cell_type":"code","source":["model.fit(x_train, y_train, batch_size=5000, epochs=60, callbacks=callbacklist)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Epoch 1/60\n","11893/11893 [==============================] - 4s 368us/step - loss: 0.0193 - acc: 0.9994\n","Epoch 2/60\n","11893/11893 [==============================] - 4s 368us/step - loss: 0.0192 - acc: 0.9994\n","Epoch 3/60\n","11893/11893 [==============================] - 5s 390us/step - loss: 0.0191 - acc: 0.9994\n","Epoch 4/60\n","11893/11893 [==============================] - 5s 387us/step - loss: 0.0191 - acc: 0.9994\n","Epoch 5/60\n","11893/11893 [==============================] - 4s 374us/step - loss: 0.0189 - acc: 0.9994\n","Epoch 6/60\n","11893/11893 [==============================] - 5s 387us/step - loss: 0.0188 - acc: 0.9994\n","Epoch 7/60\n","11893/11893 [==============================] - 5s 387us/step - loss: 0.0187 - acc: 0.9994\n","Epoch 8/60\n","11893/11893 [==============================] - 4s 374us/step - loss: 0.0186 - acc: 0.9994\n","Epoch 9/60\n","11893/11893 [==============================] - 4s 371us/step - loss: 0.0185 - acc: 0.9994\n","Epoch 10/60\n","11893/11893 [==============================] - 4s 375us/step - loss: 0.0183 - acc: 0.9994\n","\n","Epoch 00010: loss improved from 0.02087 to 0.01834, saving model to ./jawadmodel.h5\n","Epoch 11/60\n","11893/11893 [==============================] - 4s 367us/step - loss: 0.0182 - acc: 0.9994\n","Epoch 12/60\n","11893/11893 [==============================] - 5s 382us/step - loss: 0.0182 - acc: 0.9994\n","Epoch 13/60\n","11893/11893 [==============================] - 5s 390us/step - loss: 0.0180 - acc: 0.9994\n","Epoch 14/60\n","11893/11893 [==============================] - 5s 390us/step - loss: 0.0179 - acc: 0.9994\n","Epoch 15/60\n","11893/11893 [==============================] - 5s 390us/step - loss: 0.0178 - acc: 0.9994\n","Epoch 16/60\n","11893/11893 [==============================] - 5s 387us/step - loss: 0.0177 - acc: 0.9994\n","Epoch 17/60\n","11893/11893 [==============================] - 4s 378us/step - loss: 0.0176 - acc: 0.9994\n","Epoch 18/60\n","11893/11893 [==============================] - 4s 377us/step - loss: 0.0175 - acc: 0.9994\n","Epoch 19/60\n","11893/11893 [==============================] - 4s 374us/step - loss: 0.0174 - acc: 0.9994\n","Epoch 20/60\n","11893/11893 [==============================] - 4s 376us/step - loss: 0.0173 - acc: 0.9994\n","Epoch 21/60\n","11893/11893 [==============================] - 4s 371us/step - loss: 0.0172 - acc: 0.9994\n","Epoch 22/60\n","11893/11893 [==============================] - 5s 388us/step - loss: 0.0171 - acc: 0.9994\n","Epoch 23/60\n","11893/11893 [==============================] - 5s 389us/step - loss: 0.0171 - acc: 0.9994\n","Epoch 24/60\n","11893/11893 [==============================] - 5s 391us/step - loss: 0.0170 - acc: 0.9994\n","Epoch 25/60\n","11893/11893 [==============================] - 5s 390us/step - loss: 0.0169 - acc: 0.9994\n","Epoch 26/60\n","11893/11893 [==============================] - 5s 388us/step - loss: 0.0168 - acc: 0.9994\n","Epoch 27/60\n","11893/11893 [==============================] - 4s 376us/step - loss: 0.0167 - acc: 0.9994\n","Epoch 28/60\n","11893/11893 [==============================] - 4s 376us/step - loss: 0.0167 - acc: 0.9994\n","Epoch 29/60\n","11893/11893 [==============================] - 5s 379us/step - loss: 0.0166 - acc: 0.9994\n","Epoch 30/60\n","11893/11893 [==============================] - 4s 373us/step - loss: 0.0165 - acc: 0.9994\n","\n","Epoch 00030: loss improved from 0.01834 to 0.01650, saving model to ./jawadmodel.h5\n","Epoch 31/60\n","11893/11893 [==============================] - 4s 377us/step - loss: 0.0164 - acc: 0.9994\n","Epoch 32/60\n","11893/11893 [==============================] - 5s 393us/step - loss: 0.0164 - acc: 0.9994\n","Epoch 33/60\n","11893/11893 [==============================] - 5s 392us/step - loss: 0.0163 - acc: 0.9994\n","Epoch 34/60\n","11893/11893 [==============================] - 5s 389us/step - loss: 0.0163 - acc: 0.9994\n","Epoch 35/60\n","11893/11893 [==============================] - 5s 390us/step - loss: 0.0162 - acc: 0.9994\n","Epoch 36/60\n","11893/11893 [==============================] - 5s 386us/step - loss: 0.0161 - acc: 0.9994\n","Epoch 37/60\n","11893/11893 [==============================] - 4s 375us/step - loss: 0.0161 - acc: 0.9994\n","Epoch 38/60\n","11893/11893 [==============================] - 4s 374us/step - loss: 0.0160 - acc: 0.9994\n","Epoch 39/60\n","11893/11893 [==============================] - 4s 375us/step - loss: 0.0159 - acc: 0.9994\n","Epoch 40/60\n","11893/11893 [==============================] - 4s 373us/step - loss: 0.0159 - acc: 0.9994\n","Epoch 41/60\n","11893/11893 [==============================] - 5s 379us/step - loss: 0.0158 - acc: 0.9994\n","Epoch 42/60\n","11893/11893 [==============================] - 5s 391us/step - loss: 0.0157 - acc: 0.9994\n","Epoch 43/60\n","11893/11893 [==============================] - 5s 391us/step - loss: 0.0156 - acc: 0.9994\n","Epoch 44/60\n","11893/11893 [==============================] - 5s 392us/step - loss: 0.0156 - acc: 0.9994\n","Epoch 45/60\n","11893/11893 [==============================] - 5s 393us/step - loss: 0.0155 - acc: 0.9994\n","Epoch 46/60\n","11893/11893 [==============================] - 5s 381us/step - loss: 0.0155 - acc: 0.9994\n","Epoch 47/60\n","11893/11893 [==============================] - 4s 375us/step - loss: 0.0154 - acc: 0.9994\n","Epoch 48/60\n","11893/11893 [==============================] - 4s 377us/step - loss: 0.0154 - acc: 0.9994\n","Epoch 49/60\n","11893/11893 [==============================] - 4s 372us/step - loss: 0.0153 - acc: 0.9994\n","Epoch 50/60\n","11893/11893 [==============================] - 4s 375us/step - loss: 0.0153 - acc: 0.9994\n","\n","Epoch 00050: loss improved from 0.01650 to 0.01526, saving model to ./jawadmodel.h5\n","Epoch 51/60\n","11893/11893 [==============================] - 5s 385us/step - loss: 0.0152 - acc: 0.9994\n","Epoch 52/60\n","11893/11893 [==============================] - 5s 391us/step - loss: 0.0151 - acc: 0.9994\n","Epoch 53/60\n","11893/11893 [==============================] - 5s 389us/step - loss: 0.0151 - acc: 0.9994\n","Epoch 54/60\n","11893/11893 [==============================] - 5s 388us/step - loss: 0.0151 - acc: 0.9994\n","Epoch 55/60\n","11893/11893 [==============================] - 5s 386us/step - loss: 0.0150 - acc: 0.9994\n","Epoch 56/60\n","11893/11893 [==============================] - 4s 375us/step - loss: 0.0149 - acc: 0.9994\n","Epoch 57/60\n","11893/11893 [==============================] - 4s 375us/step - loss: 0.0149 - acc: 0.9994\n","Epoch 58/60\n","11893/11893 [==============================] - 4s 376us/step - loss: 0.0148 - acc: 0.9994\n","Epoch 59/60\n","11893/11893 [==============================] - 4s 372us/step - loss: 0.0148 - acc: 0.9994\n","Epoch 60/60\n","11893/11893 [==============================] - 4s 376us/step - loss: 0.0147 - acc: 0.9994\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":34}]},{"metadata":{"colab_type":"code","id":"BAljPK2HwaoM","colab":{}},"cell_type":"code","source":["!cp ./jawadmodel.h5 \"./drive/My Drive/Thesis Work/Implementation2/model.h5\""],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"code","outputId":"9dbdc489-e3cd-4dad-91c5-7ed9d2930efa","executionInfo":{"status":"ok","timestamp":1548255010378,"user_tz":-300,"elapsed":4495,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"0mbcLPoYwaoQ","colab":{"base_uri":"https://localhost:8080/","height":50}},"cell_type":"code","source":["!ls \"./drive/My Drive/Thesis Work/Implementation2/\""],"execution_count":0,"outputs":[{"output_type":"stream","text":["model.h5\t\t test_words.npy\t train_words.npy x_train.npy\n","test_phoneErrorPercentage.npy train_phones.npy x_test.npy\t y_train.npy\n"],"name":"stdout"}]},{"metadata":{"id":"cnZALbMRxjCo","colab_type":"text"},"cell_type":"markdown","source":["Training Completed. Accuracy 100% and Loss 0.0147. "]},{"metadata":{"id":"Jl6srPbI8p3K","colab_type":"text"},"cell_type":"markdown","source":["### TESTING"]},{"metadata":{"id":"Dhfp1slu8-DC","colab_type":"text"},"cell_type":"markdown","source":["#### Representation Layer, L2 norm, Prediction Checks"]},{"metadata":{"colab_type":"code","outputId":"842800cc-6f25-4996-ab44-11b1e58e522a","executionInfo":{"status":"ok","timestamp":1548255276553,"user_tz":-300,"elapsed":4354,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"DrcuQZ3w9U3f","colab":{"base_uri":"https://localhost:8080/","height":50}},"cell_type":"code","source":["!ls './drive/My Drive/Thesis Work/WordLevel_RL_MP_MFCC_TrainL2_TestL2'"],"execution_count":0,"outputs":[{"output_type":"stream","text":["model.h5\t\t test_words.npy\t train_words.npy x_train.npy\n","test_phoneErrorPercentage.npy train_phones.npy x_test.npy\t y_train.npy\n"],"name":"stdout"}]},{"metadata":{"colab_type":"code","id":"17zxKitN9U3j","colab":{}},"cell_type":"code","source":["model = load_model('./drive/My Drive/Thesis Work/WordLevel_RL_MP_MFCC_TrainL2_TestL2/model.h5')"],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"code","id":"B4NgBdA09U3o","colab":{}},"cell_type":"code","source":["x_train = np.load(\"./drive/My Drive/Thesis Work/WordLevel_RL_MP_MFCC_TrainL2_TestL2/x_train.npy\")\n","y_train = np.load(\"./drive/My Drive/Thesis Work/WordLevel_RL_MP_MFCC_TrainL2_TestL2/y_train.npy\")"],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"code","id":"QKx2H_iN9U3s","colab":{}},"cell_type":"code","source":["train_words = np.load(\"./drive/My Drive/Thesis Work/WordLevel_RL_MP_MFCC_TrainL2_TestL2/train_words.npy\") #vocabulary"],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"code","outputId":"363bcdba-6d44-431f-c4f5-ba152160be0a","executionInfo":{"status":"ok","timestamp":1548255339649,"user_tz":-300,"elapsed":1238,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"RB5evuIS9U3v","colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["encoder = LabelBinarizer()\n","encoder.fit(train_words)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)"]},"metadata":{"tags":[]},"execution_count":41}]},{"metadata":{"colab_type":"code","outputId":"0c1c4a29-bb44-4da2-b17f-65661c2b6231","executionInfo":{"status":"ok","timestamp":1548255360929,"user_tz":-300,"elapsed":1122,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"0HSO0Ot39U3y","colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["encoder.inverse_transform(model.predict(x_train[:4])) #predicting correctly"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['for', 'the', 'twentieth', 'time'], dtype='"]},"metadata":{"tags":[]},"execution_count":45}]},{"metadata":{"colab_type":"code","id":"nsgtvOaV9U4H","colab":{}},"cell_type":"code","source":["intermediate_layer_model = Model(inputs=model.input,\n"," outputs=model.get_layer(index=-2).output)"],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"code","outputId":"64c9ea11-07b9-456a-c67a-c9d62a92ec0e","executionInfo":{"status":"ok","timestamp":1548255483912,"user_tz":-300,"elapsed":1162,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"WeopqwQz9U4K","colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["np.linalg.norm(intermediate_layer_model.predict(x_train[1:2]) - intermediate_layer_model.predict(x_train[1:2])) #l2 norm"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.0"]},"metadata":{"tags":[]},"execution_count":47}]},{"metadata":{"id":"LQpZ0WnrOBbB","colab_type":"text"},"cell_type":"markdown","source":["#### Loading Train-Test Arrays"]},{"metadata":{"id":"g6fQ3xB2OWbx","colab_type":"code","colab":{}},"cell_type":"code","source":["x_test = np.load('./drive/My Drive/Thesis Work/Implementation2/x_test.npy')\n","test_words = np.load('./drive/My Drive/Thesis Work/Implementation2/test_words.npy')\n","test_phoneErrorPercentage = np.load('./drive/My Drive/Thesis Work/Implementation2/test_phoneErrorPercentage.npy')\n","test_mispronouncedPhones = np.load('./drive/My Drive/Thesis Work/Implementation2/test_mispronouncedPhones.npy')\n","test_correctPhones = np.load('./drive/My Drive/Thesis Work/Implementation2/test_correctPhones.npy')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"uTZZOraAgyM0","colab_type":"code","outputId":"4036f80d-5815-4bb1-f403-05fc438beb5b","executionInfo":{"status":"ok","timestamp":1548256055978,"user_tz":-300,"elapsed":1234,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["x_test.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(9140, 13, 118, 3)"]},"metadata":{"tags":[]},"execution_count":55}]},{"metadata":{"id":"GxsFy67PR1rM","colab_type":"code","colab":{}},"cell_type":"code","source":["x_train = np.load(\"./drive/My Drive/Thesis Work/Implementation2/x_train.npy\")\n","y_train = np.load(\"./drive/My Drive/Thesis Work/Implementation2/y_train.npy\")\n","train_words = np.load(\"./drive/My Drive/Thesis Work/Implementation2/train_words.npy\") #vocabulary\n","model = load_model('./drive/My Drive/Thesis Work/Implementation2/model.h5')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"EGyVPsDBRaLv","colab_type":"text"},"cell_type":"markdown","source":["#### Validation and Testing"]},{"metadata":{"id":"rdvGVUYByG1J","colab_type":"code","outputId":"1c03c1b3-2408-4ef4-9f67-efb7ec3a563c","executionInfo":{"status":"ok","timestamp":1548256641692,"user_tz":-300,"elapsed":1024,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["model.layers[-2]"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":59}]},{"metadata":{"id":"yml5Ni1Uxxq5","colab_type":"code","colab":{}},"cell_type":"code","source":["intermediate_layer_model = Model(inputs=model.input,\n"," outputs=model.get_layer(index=-2).output)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"e3uN8DUXyk5O","colab_type":"code","outputId":"982ca2ad-e006-4f2f-b40e-cbb4d1991c21","executionInfo":{"status":"ok","timestamp":1548256727658,"user_tz":-300,"elapsed":838,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["#Number of Words not present in Train Set --> These will be skipped in validation and testing\n","len(set(test_words)) - len(set(test_words).intersection(set(train_words)))"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["113"]},"metadata":{"tags":[]},"execution_count":61}]},{"metadata":{"id":"5VpUO_ZOVKE3","colab_type":"code","outputId":"a819d187-814c-423e-8e98-d97ba194f609","executionInfo":{"status":"ok","timestamp":1548257377536,"user_tz":-300,"elapsed":837,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["len(set(test_words))"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["262"]},"metadata":{"tags":[]},"execution_count":72}]},{"metadata":{"id":"86XnQFChVMnL","colab_type":"text"},"cell_type":"markdown","source":["MORE THAN HALF NOT EVEN PRESENT!!! :O"]},{"metadata":{"id":"wgeCw3mayqTL","colab_type":"code","outputId":"47fa8593-6028-4486-d370-21b3d5541c87","executionInfo":{"status":"ok","timestamp":1548256744574,"user_tz":-300,"elapsed":820,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["encoder = LabelBinarizer()\n","encoder.fit(train_words)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)"]},"metadata":{"tags":[]},"execution_count":62}]},{"metadata":{"id":"xLlaeHgD0YYQ","colab_type":"code","outputId":"ada90b40-d646-454b-9220-c0a1a1772368","executionInfo":{"status":"ok","timestamp":1548256771228,"user_tz":-300,"elapsed":793,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["len(test_words)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["9140"]},"metadata":{"tags":[]},"execution_count":63}]},{"metadata":{"id":"Mnjp3FLe3Rjb","colab_type":"code","colab":{}},"cell_type":"code","source":["train_representations = intermediate_layer_model.predict(x_train)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"8QPm7fzW3s_6","colab_type":"code","outputId":"c7663f4a-d448-49ce-8eb6-ea619c5e8f50","executionInfo":{"status":"ok","timestamp":1548256783529,"user_tz":-300,"elapsed":1517,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["train_representations.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(11893, 64)"]},"metadata":{"tags":[]},"execution_count":65}]},{"metadata":{"id":"LNwK8i5u0QwM","colab_type":"code","outputId":"2e381b0a-d8da-4478-d5b9-fda815c58960","executionInfo":{"status":"ok","timestamp":1548257455682,"user_tz":-300,"elapsed":18292,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":121}},"cell_type":"code","source":["#VALIDATION\n","\n","valSetLength = 1000 #initial some from test\n","tolerancePercentage = 40\n","\n","truelabels = [1 if test_phoneErrorPercentage[i] <= tolerancePercentage else 0 for i in range(valSetLength) if test_words[i] in train_words]\n","\n","accs = []\n","f1mis = []\n","for threshold in [150,160,170,180,190,200,210,220,250]:\n"," predictedLabels = []\n"," for i in range(valSetLength): #iterating on test words\n"," if test_words[i] in train_words:\n"," '''print(i)\n"," print(test_words[i])\n"," print(test_phoneErrorPercentage[i])\n"," print(np.mean(np.linalg.norm(train_representations[np.argwhere(train_words == test_words[i]).reshape(-1,)]\\\n"," - intermediate_layer_model.predict(x_test[i:i+1]),axis=1)))'''\n"," if np.mean(np.linalg.norm(train_representations[np.argwhere(train_words == test_words[i]).reshape(-1,)]\\\n"," - intermediate_layer_model.predict(x_test[i:i+1]),axis=1)) <= threshold:\n"," predictedLabels.append(1)\n"," else: \n"," predictedLabels.append(0)\n","\n"," accs.append(sklm.accuracy_score(truelabels,predictedLabels))\n"," f1mis.append(sklm.precision_recall_fscore_support(truelabels,predictedLabels)[2][0])\n","\n","print(\"Threshold: \", np.argmax(f1mis))\n","print(\"Max Achievable Accuracy: \", accs[np.argmax(f1mis)])\n","print(\"Max Achievable F-1 score for Mispronunciations: \", max(f1mis))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/sklearn/metrics/classification.py:1143: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n"," 'precision', 'predicted', average, warn_for)\n"],"name":"stderr"},{"output_type":"stream","text":["Threshold: 6\n","Max Achievable Accuracy: 0.14496644295302014\n","Max Achievable F-1 score for Mispronunciations: 0.24615384615384614\n"],"name":"stdout"}]},{"metadata":{"id":"j6Jh2RbeU-xi","colab_type":"code","outputId":"e8dc25a6-d4be-4c9b-c996-30d2279385ba","executionInfo":{"status":"ok","timestamp":1548257549333,"user_tz":-300,"elapsed":828,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"colab":{"base_uri":"https://localhost:8080/","height":84}},"cell_type":"code","source":["sklm.precision_recall_fscore_support(truelabels,predictedLabels) #0 - 1"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(array([0.14 , 0.86666667]),\n"," array([0.94230769, 0.06084243]),\n"," array([0.24378109, 0.11370262]),\n"," array([104, 641]))"]},"metadata":{"tags":[]},"execution_count":77}]},{"metadata":{"id":"5hyoqpH1z4J6","colab_type":"text"},"cell_type":"markdown","source":["**Testing**"]},{"metadata":{"id":"ObGZnrJyz3zJ","colab_type":"code","colab":{}},"cell_type":"code","source":["valSetLength = 1000 #initial some from test\n","tolerancePercentage = 40\n","threshold = 210\n","truelabels = [1 if test_phoneErrorPercentage[i] <= tolerancePercentage else 0 for i in range(valSetLength,test_words.shape[0]) if test_words[i] in train_words]\n","\n","predictedLabels = []\n","for i in range(valSetLength,test_words.shape[0]): #iterating on test words\n"," if test_words[i] in train_words:\n"," '''print(i)\n"," print(test_words[i])\n"," print(test_phoneErrorPercentage[i])\n"," print(np.mean(np.linalg.norm(train_representations[np.argwhere(train_words == test_words[i]).reshape(-1,)]\\\n"," - intermediate_layer_model.predict(x_test[i:i+1]),axis=1)))'''\n"," if np.mean(np.linalg.norm(train_representations[np.argwhere(train_words == test_words[i]).reshape(-1,)]\\\n"," - intermediate_layer_model.predict(x_test[i:i+1]),axis=1)) <= threshold:\n"," predictedLabels.append(1)\n"," else: \n"," predictedLabels.append(0)"],"execution_count":0,"outputs":[]},{"metadata":{"colab_type":"code","outputId":"2ae618c0-519c-4a94-d996-4d475879a05f","executionInfo":{"status":"ok","timestamp":1548257711310,"user_tz":-300,"elapsed":985,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"ioPnCpdvWYRk","colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["print('Accuracy = ', sklm.accuracy_score(truelabels,predictedLabels))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Accuracy = 0.13755135579293346\n"],"name":"stdout"}]},{"metadata":{"colab_type":"code","outputId":"f8836c25-a385-4f18-f488-daedde46b2fb","executionInfo":{"status":"ok","timestamp":1548257713916,"user_tz":-300,"elapsed":836,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"XkZRHkBqWYRn","colab":{"base_uri":"https://localhost:8080/","height":84}},"cell_type":"code","source":["sklm.precision_recall_fscore_support(truelabels,predictedLabels) #0 - 1"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(array([0.13305785, 0.91428571]),\n"," array([0.99628713, 0.00606405]),\n"," array([0.23476232, 0.01204819]),\n"," array([ 808, 5277]))"]},"metadata":{"tags":[]},"execution_count":81}]},{"metadata":{"colab_type":"code","outputId":"d2f66044-3956-4750-e726-e77831fec66d","executionInfo":{"status":"ok","timestamp":1548257723088,"user_tz":-300,"elapsed":805,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"Gpc5QCT-WYRs","colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["sklm.precision_recall_fscore_support(truelabels,predictedLabels)[2][0] #0"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.2347623213764946"]},"metadata":{"tags":[]},"execution_count":82}]},{"metadata":{"colab_type":"code","outputId":"6b395897-33fe-4355-e8da-f1d59e2f2b00","executionInfo":{"status":"ok","timestamp":1548257729860,"user_tz":-300,"elapsed":794,"user":{"displayName":"Jawad Arshad","photoUrl":"https://lh5.googleusercontent.com/-4aOV1ZG2uVk/AAAAAAAAAAI/AAAAAAAAABw/DsyyNnGP7SM/s64/photo.jpg","userId":"13741434010255651176"}},"id":"gWOCe6UgWYRw","colab":{"base_uri":"https://localhost:8080/","height":67}},"cell_type":"code","source":["sklm.roc_curve(truelabels,predictedLabels)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(array([0. , 0.00371287, 1. ]),\n"," array([0. , 0.00606405, 1. ]),\n"," array([2, 1, 0]))"]},"metadata":{"tags":[]},"execution_count":83}]},{"metadata":{"colab_type":"text","id":"qD2JWRWaWYRz"},"cell_type":"markdown","source":["*CANNOT COMPARE RESULTS:* \n","The Interspeech one has a **different dataset** and shows results on phone level (as every phone is a separate class there)\n","\n","The l2-arctic one shows no results\n","\n","I am the first one to show results on this dataset"]},{"metadata":{"id":"aL5lNZ9-b0HW","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JawadAr/Pronunciation-verification-using-anomaly-detection-Thesis/f44306de460eb460434c6b4b5efa032723b45b64/download.png --------------------------------------------------------------------------------