├── AudioCompression_using_AutoEncoder_Machine_Learning.ipynb └── README.md /AudioCompression_using_AutoEncoder_Machine_Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "WARNING: Logging before flag parsing goes to stderr.\n", 13 | "W0113 00:53:51.602934 33696 deprecation.py:323] From :176: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n", 14 | "Instructions for updating:\n", 15 | "Use keras.layers.dense instead.\n" 16 | ] 17 | }, 18 | { 19 | "name": "stdout", 20 | "output_type": "stream", 21 | "text": [ 22 | "I GOT HERE\n" 23 | ] 24 | }, 25 | { 26 | "name": "stderr", 27 | "output_type": "stream", 28 | "text": [ 29 | "W0113 00:54:11.267351 33696 deprecation.py:323] From c:\\anaconda3\\lib\\site-packages\\tensorflow\\python\\training\\saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n", 30 | "Instructions for updating:\n", 31 | "Use standard file APIs to check for files with this prefix.\n" 32 | ] 33 | }, 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "4 5\n", 39 | "See this: F:\\tfaudio\\audiowav\\r (15).wav\n", 40 | "audio shape (5314197, 2)\n", 41 | "Returning File: F:\\tfaudio\\audiowav\\r (15).wav\n", 42 | "Epoch: 0\n", 43 | "totalsongs(1, 1062, 10000)\n", 44 | "Song loss: 2897.7048\n", 45 | "Curr Epoch: 0 Curr Batch: 0/1\n", 46 | "Batch Loss: 2897.7048\n", 47 | "Epoch Avg Loss: 2897.7048\n", 48 | "Epoch: 1\n", 49 | "totalsongs(1, 1062, 10000)\n", 50 | "Song loss: 2868.833\n", 51 | "Curr Epoch: 1 Curr Batch: 0/1\n", 52 | "Batch Loss: 2868.833\n", 53 | "Epoch Avg Loss: 2868.833\n", 54 | "Epoch: 2\n", 55 | "totalsongs(1, 1062, 10000)\n", 56 | "Song loss: 2834.8965\n", 57 | "Curr Epoch: 2 Curr Batch: 0/1\n", 58 | "Batch Loss: 2834.8965\n", 59 | "Epoch Avg Loss: 2834.8965\n", 60 | "Epoch: 3\n", 61 | "totalsongs(1, 1062, 10000)\n", 62 | "Song loss: 2814.4148\n", 63 | "Curr Epoch: 3 Curr Batch: 0/1\n", 64 | "Batch Loss: 2814.4148\n", 65 | "Epoch Avg Loss: 2814.4148\n", 66 | "Epoch: 4\n", 67 | "totalsongs(1, 1062, 10000)\n", 68 | "Song loss: 2808.1172\n", 69 | "Curr Epoch: 4 Curr Batch: 0/1\n", 70 | "Batch Loss: 2808.1172\n", 71 | "Epoch Avg Loss: 2808.1172\n", 72 | "Epoch: 5\n", 73 | "totalsongs(1, 1062, 10000)\n", 74 | "Song loss: 2805.7178\n", 75 | "Curr Epoch: 5 Curr Batch: 0/1\n", 76 | "Batch Loss: 2805.7178\n", 77 | "Epoch Avg Loss: 2805.7178\n", 78 | "Epoch: 6\n", 79 | "totalsongs(1, 1062, 10000)\n", 80 | "Song loss: 2800.1287\n", 81 | "Curr Epoch: 6 Curr Batch: 0/1\n", 82 | "Batch Loss: 2800.1287\n", 83 | "Epoch Avg Loss: 2800.1287\n", 84 | "Epoch: 7\n", 85 | "totalsongs(1, 1062, 10000)\n", 86 | "Song loss: 2791.9536\n", 87 | "Curr Epoch: 7 Curr Batch: 0/1\n", 88 | "Batch Loss: 2791.9536\n", 89 | "Epoch Avg Loss: 2791.9536\n", 90 | "Epoch: 8\n", 91 | "totalsongs(1, 1062, 10000)\n", 92 | "Song loss: 2785.9773\n", 93 | "Curr Epoch: 8 Curr Batch: 0/1\n", 94 | "Batch Loss: 2785.9773\n", 95 | "Epoch Avg Loss: 2785.9773\n", 96 | "Epoch: 9\n", 97 | "totalsongs(1, 1062, 10000)\n", 98 | "Song loss: 2784.0908\n", 99 | "Curr Epoch: 9 Curr Batch: 0/1\n", 100 | "Batch Loss: 2784.0908\n", 101 | "Epoch Avg Loss: 2784.0908\n", 102 | "Epoch: 10\n", 103 | "totalsongs(1, 1062, 10000)\n", 104 | "Song loss: 2783.079\n", 105 | "Curr Epoch: 10 Curr Batch: 0/1\n", 106 | "Batch Loss: 2783.079\n", 107 | "Epoch Avg Loss: 2783.079\n", 108 | "Epoch: 11\n", 109 | "totalsongs(1, 1062, 10000)\n", 110 | "Song loss: 2779.0698\n", 111 | "Curr Epoch: 11 Curr Batch: 0/1\n", 112 | "Batch Loss: 2779.0698\n", 113 | "Epoch Avg Loss: 2779.0698\n", 114 | "Epoch: 12\n", 115 | "totalsongs(1, 1062, 10000)\n", 116 | "Song loss: 2772.0159\n", 117 | "Curr Epoch: 12 Curr Batch: 0/1\n", 118 | "Batch Loss: 2772.0159\n", 119 | "Epoch Avg Loss: 2772.0159\n", 120 | "Epoch: 13\n", 121 | "totalsongs(1, 1062, 10000)\n", 122 | "Song loss: 2764.721\n", 123 | "Curr Epoch: 13 Curr Batch: 0/1\n", 124 | "Batch Loss: 2764.721\n", 125 | "Epoch Avg Loss: 2764.721\n", 126 | "Epoch: 14\n", 127 | "totalsongs(1, 1062, 10000)\n", 128 | "Song loss: 2759.0554\n", 129 | "Curr Epoch: 14 Curr Batch: 0/1\n", 130 | "Batch Loss: 2759.0554\n", 131 | "Epoch Avg Loss: 2759.0554\n", 132 | "Epoch: 15\n", 133 | "totalsongs(1, 1062, 10000)\n", 134 | "Song loss: 2754.6016\n", 135 | "Curr Epoch: 15 Curr Batch: 0/1\n", 136 | "Batch Loss: 2754.6016\n", 137 | "Epoch Avg Loss: 2754.6016\n", 138 | "Epoch: 16\n", 139 | "totalsongs(1, 1062, 10000)\n", 140 | "Song loss: 2750.2036\n", 141 | "Curr Epoch: 16 Curr Batch: 0/1\n", 142 | "Batch Loss: 2750.2036\n", 143 | "Epoch Avg Loss: 2750.2036\n", 144 | "Epoch: 17\n", 145 | "totalsongs(1, 1062, 10000)\n", 146 | "Song loss: 2745.731\n", 147 | "Curr Epoch: 17 Curr Batch: 0/1\n", 148 | "Batch Loss: 2745.731\n", 149 | "Epoch Avg Loss: 2745.731\n", 150 | "Epoch: 18\n", 151 | "totalsongs(1, 1062, 10000)\n", 152 | "Song loss: 2741.9744\n", 153 | "Curr Epoch: 18 Curr Batch: 0/1\n", 154 | "Batch Loss: 2741.9744\n", 155 | "Epoch Avg Loss: 2741.9744\n", 156 | "Epoch: 19\n", 157 | "totalsongs(1, 1062, 10000)\n", 158 | "Song loss: 2739.3975\n", 159 | "Curr Epoch: 19 Curr Batch: 0/1\n", 160 | "Batch Loss: 2739.3975\n", 161 | "Epoch Avg Loss: 2739.3975\n", 162 | "Epoch: 20\n", 163 | "totalsongs(1, 1062, 10000)\n", 164 | "Song loss: 2737.3164\n", 165 | "Curr Epoch: 20 Curr Batch: 0/1\n", 166 | "Batch Loss: 2737.3164\n", 167 | "Epoch Avg Loss: 2737.3164\n", 168 | "Epoch: 21\n", 169 | "totalsongs(1, 1062, 10000)\n", 170 | "Song loss: 2734.51\n", 171 | "Curr Epoch: 21 Curr Batch: 0/1\n", 172 | "Batch Loss: 2734.51\n", 173 | "Epoch Avg Loss: 2734.51\n", 174 | "Epoch: 22\n", 175 | "totalsongs(1, 1062, 10000)\n", 176 | "Song loss: 2730.5425\n", 177 | "Curr Epoch: 22 Curr Batch: 0/1\n", 178 | "Batch Loss: 2730.5425\n", 179 | "Epoch Avg Loss: 2730.5425\n", 180 | "Epoch: 23\n", 181 | "totalsongs(1, 1062, 10000)\n", 182 | "Song loss: 2726.2256\n", 183 | "Curr Epoch: 23 Curr Batch: 0/1\n", 184 | "Batch Loss: 2726.2256\n", 185 | "Epoch Avg Loss: 2726.2256\n", 186 | "Epoch: 24\n", 187 | "totalsongs(1, 1062, 10000)\n", 188 | "Song loss: 2722.7183\n", 189 | "Curr Epoch: 24 Curr Batch: 0/1\n", 190 | "Batch Loss: 2722.7183\n", 191 | "Epoch Avg Loss: 2722.7183\n", 192 | "Epoch: 25\n", 193 | "totalsongs(1, 1062, 10000)\n", 194 | "Song loss: 2720.3591\n", 195 | "Curr Epoch: 25 Curr Batch: 0/1\n", 196 | "Batch Loss: 2720.3591\n", 197 | "Epoch Avg Loss: 2720.3591\n", 198 | "Epoch: 26\n", 199 | "totalsongs(1, 1062, 10000)\n", 200 | "Song loss: 2718.4792\n", 201 | "Curr Epoch: 26 Curr Batch: 0/1\n", 202 | "Batch Loss: 2718.4792\n", 203 | "Epoch Avg Loss: 2718.4792\n", 204 | "Epoch: 27\n", 205 | "totalsongs(1, 1062, 10000)\n", 206 | "Song loss: 2716.2288\n", 207 | "Curr Epoch: 27 Curr Batch: 0/1\n", 208 | "Batch Loss: 2716.2288\n", 209 | "Epoch Avg Loss: 2716.2288\n", 210 | "Epoch: 28\n", 211 | "totalsongs(1, 1062, 10000)\n", 212 | "Song loss: 2713.4785\n", 213 | "Curr Epoch: 28 Curr Batch: 0/1\n", 214 | "Batch Loss: 2713.4785\n", 215 | "Epoch Avg Loss: 2713.4785\n", 216 | "Epoch: 29\n", 217 | "totalsongs(1, 1062, 10000)\n", 218 | "Song loss: 2710.7495\n", 219 | "Curr Epoch: 29 Curr Batch: 0/1\n", 220 | "Batch Loss: 2710.7495\n", 221 | "Epoch Avg Loss: 2710.7495\n", 222 | "Epoch: 30\n", 223 | "totalsongs(1, 1062, 10000)\n", 224 | "Song loss: 2708.5298\n", 225 | "Curr Epoch: 30 Curr Batch: 0/1\n", 226 | "Batch Loss: 2708.5298\n", 227 | "Epoch Avg Loss: 2708.5298\n", 228 | "Epoch: 31\n", 229 | "totalsongs(1, 1062, 10000)\n", 230 | "Song loss: 2706.7776\n", 231 | "Curr Epoch: 31 Curr Batch: 0/1\n", 232 | "Batch Loss: 2706.7776\n", 233 | "Epoch Avg Loss: 2706.7776\n", 234 | "Epoch: 32\n", 235 | "totalsongs(1, 1062, 10000)\n", 236 | "Song loss: 2705.042\n", 237 | "Curr Epoch: 32 Curr Batch: 0/1\n", 238 | "Batch Loss: 2705.042\n", 239 | "Epoch Avg Loss: 2705.042\n", 240 | "Epoch: 33\n", 241 | "totalsongs(1, 1062, 10000)\n", 242 | "Song loss: 2702.984\n", 243 | "Curr Epoch: 33 Curr Batch: 0/1\n", 244 | "Batch Loss: 2702.984\n", 245 | "Epoch Avg Loss: 2702.984\n", 246 | "Epoch: 34\n", 247 | "totalsongs(1, 1062, 10000)\n", 248 | "Song loss: 2700.694\n", 249 | "Curr Epoch: 34 Curr Batch: 0/1\n", 250 | "Batch Loss: 2700.694\n", 251 | "Epoch Avg Loss: 2700.694\n", 252 | "Epoch: 35\n", 253 | "totalsongs(1, 1062, 10000)\n", 254 | "Song loss: 2698.5034\n", 255 | "Curr Epoch: 35 Curr Batch: 0/1\n", 256 | "Batch Loss: 2698.5034\n", 257 | "Epoch Avg Loss: 2698.5034\n", 258 | "Epoch: 36\n", 259 | "totalsongs(1, 1062, 10000)\n", 260 | "Song loss: 2696.6104\n", 261 | "Curr Epoch: 36 Curr Batch: 0/1\n", 262 | "Batch Loss: 2696.6104\n", 263 | "Epoch Avg Loss: 2696.6104\n", 264 | "Epoch: 37\n", 265 | "totalsongs(1, 1062, 10000)\n", 266 | "Song loss: 2694.9312\n", 267 | "Curr Epoch: 37 Curr Batch: 0/1\n", 268 | "Batch Loss: 2694.9312\n", 269 | "Epoch Avg Loss: 2694.9312\n", 270 | "Epoch: 38\n", 271 | "totalsongs(1, 1062, 10000)\n", 272 | "Song loss: 2693.271\n", 273 | "Curr Epoch: 38 Curr Batch: 0/1\n", 274 | "Batch Loss: 2693.271\n", 275 | "Epoch Avg Loss: 2693.271\n", 276 | "Epoch: 39\n", 277 | "totalsongs(1, 1062, 10000)\n", 278 | "Song loss: 2691.544\n", 279 | "Curr Epoch: 39 Curr Batch: 0/1\n", 280 | "Batch Loss: 2691.544\n", 281 | "Epoch Avg Loss: 2691.544\n", 282 | "Epoch: 40\n", 283 | "totalsongs(1, 1062, 10000)\n", 284 | "Song loss: 2689.8262\n", 285 | "Curr Epoch: 40 Curr Batch: 0/1\n", 286 | "Batch Loss: 2689.8262\n", 287 | "Epoch Avg Loss: 2689.8262\n", 288 | "Epoch: 41\n", 289 | "totalsongs(1, 1062, 10000)\n", 290 | "Song loss: 2688.2156\n", 291 | "Curr Epoch: 41 Curr Batch: 0/1\n", 292 | "Batch Loss: 2688.2156\n", 293 | "Epoch Avg Loss: 2688.2156\n", 294 | "Epoch: 42\n", 295 | "totalsongs(1, 1062, 10000)\n", 296 | "Song loss: 2686.7153\n", 297 | "Curr Epoch: 42 Curr Batch: 0/1\n", 298 | "Batch Loss: 2686.7153\n", 299 | "Epoch Avg Loss: 2686.7153\n", 300 | "Epoch: 43\n", 301 | "totalsongs(1, 1062, 10000)\n", 302 | "Song loss: 2685.2532\n", 303 | "Curr Epoch: 43 Curr Batch: 0/1\n", 304 | "Batch Loss: 2685.2532\n", 305 | "Epoch Avg Loss: 2685.2532\n", 306 | "Epoch: 44\n", 307 | "totalsongs(1, 1062, 10000)\n", 308 | "Song loss: 2683.7788\n", 309 | "Curr Epoch: 44 Curr Batch: 0/1\n", 310 | "Batch Loss: 2683.7788\n", 311 | "Epoch Avg Loss: 2683.7788\n", 312 | "Epoch: 45\n", 313 | "totalsongs(1, 1062, 10000)\n", 314 | "Song loss: 2682.3123\n", 315 | "Curr Epoch: 45 Curr Batch: 0/1\n", 316 | "Batch Loss: 2682.3123\n", 317 | "Epoch Avg Loss: 2682.3123\n", 318 | "Epoch: 46\n", 319 | "totalsongs(1, 1062, 10000)\n", 320 | "Song loss: 2680.8975\n", 321 | "Curr Epoch: 46 Curr Batch: 0/1\n", 322 | "Batch Loss: 2680.8975\n", 323 | "Epoch Avg Loss: 2680.8975\n", 324 | "Epoch: 47\n", 325 | "totalsongs(1, 1062, 10000)\n", 326 | "Song loss: 2679.558\n", 327 | "Curr Epoch: 47 Curr Batch: 0/1\n", 328 | "Batch Loss: 2679.558\n", 329 | "Epoch Avg Loss: 2679.558\n", 330 | "Epoch: 48\n", 331 | "totalsongs(1, 1062, 10000)\n", 332 | "Song loss: 2678.2349\n", 333 | "Curr Epoch: 48 Curr Batch: 0/1\n", 334 | "Batch Loss: 2678.2349\n", 335 | "Epoch Avg Loss: 2678.2349\n", 336 | "Epoch: 49\n", 337 | "totalsongs(1, 1062, 10000)\n", 338 | "Song loss: 2676.9092\n", 339 | "Curr Epoch: 49 Curr Batch: 0/1\n", 340 | "Batch Loss: 2676.9092\n", 341 | "Epoch Avg Loss: 2676.9092\n", 342 | "Epoch: 50\n", 343 | "totalsongs(1, 1062, 10000)\n", 344 | "Song loss: 2675.6008\n", 345 | "Curr Epoch: 50 Curr Batch: 0/1\n", 346 | "Batch Loss: 2675.6008\n", 347 | "Epoch Avg Loss: 2675.6008\n", 348 | "Sample rate: 44100\n", 349 | "encodedshape(1062, 1800)\n", 350 | "Output: [[ 7.3477707e+01 6.4577866e+01 1.0814214e+01 ... 2.3098246e+04\n", 351 | " 1.1645315e+04 -9.3722504e+02]\n", 352 | " [-8.3661152e+03 2.1774602e+03 7.2459775e+03 ... -1.6125327e+02\n", 353 | " 1.0913843e+03 4.0661896e+02]\n", 354 | " [-6.2134350e+01 -1.4534984e+03 7.1217517e+02 ... -7.4994452e+02\n", 355 | " -2.0445712e+02 6.6502631e+02]\n", 356 | " ...\n", 357 | " [ 1.3832479e+00 2.1197855e-02 1.4356872e-01 ... 3.0875385e-02\n", 358 | " 9.9974066e-02 -8.3744198e-02]\n", 359 | " [ 1.4182310e+00 2.9370368e-02 1.3619992e-01 ... 2.7370155e-02\n", 360 | " 1.0397628e-01 -9.1196567e-02]\n", 361 | " [ 1.2922759e+00 6.7051947e-02 1.3771275e-01 ... 1.7371118e-02\n", 362 | " 8.6671740e-02 -6.0075134e-02]]\n", 363 | "evaluation(1062, 10000)\n", 364 | "x_batch(1062, 10000)\n" 365 | ] 366 | }, 367 | { 368 | "data": { 369 | "text/plain": [ 370 | "
" 371 | ] 372 | }, 373 | "metadata": {}, 374 | "output_type": "display_data" 375 | }, 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "aga\n", 381 | "Epoch: 51\n", 382 | "totalsongs(1, 1062, 10000)\n", 383 | "Song loss: 2674.338\n", 384 | "Curr Epoch: 51 Curr Batch: 0/1\n", 385 | "Batch Loss: 2674.338\n", 386 | "Epoch Avg Loss: 2674.338\n", 387 | "Epoch: 52\n", 388 | "totalsongs(1, 1062, 10000)\n", 389 | "Song loss: 2673.1316\n", 390 | "Curr Epoch: 52 Curr Batch: 0/1\n", 391 | "Batch Loss: 2673.1316\n", 392 | "Epoch Avg Loss: 2673.1316\n", 393 | "Epoch: 53\n", 394 | "totalsongs(1, 1062, 10000)\n", 395 | "Song loss: 2671.9539\n", 396 | "Curr Epoch: 53 Curr Batch: 0/1\n", 397 | "Batch Loss: 2671.9539\n", 398 | "Epoch Avg Loss: 2671.9539\n", 399 | "Epoch: 54\n", 400 | "totalsongs(1, 1062, 10000)\n", 401 | "Song loss: 2670.7708\n", 402 | "Curr Epoch: 54 Curr Batch: 0/1\n", 403 | "Batch Loss: 2670.7708\n", 404 | "Epoch Avg Loss: 2670.7708\n", 405 | "Epoch: 55\n", 406 | "totalsongs(1, 1062, 10000)\n", 407 | "Song loss: 2669.581\n", 408 | "Curr Epoch: 55 Curr Batch: 0/1\n", 409 | "Batch Loss: 2669.581\n", 410 | "Epoch Avg Loss: 2669.581\n", 411 | "Epoch: 56\n", 412 | "totalsongs(1, 1062, 10000)\n", 413 | "Song loss: 2668.4133\n", 414 | "Curr Epoch: 56 Curr Batch: 0/1\n", 415 | "Batch Loss: 2668.4133\n", 416 | "Epoch Avg Loss: 2668.4133\n", 417 | "Epoch: 57\n", 418 | "totalsongs(1, 1062, 10000)\n", 419 | "Song loss: 2667.2917\n", 420 | "Curr Epoch: 57 Curr Batch: 0/1\n", 421 | "Batch Loss: 2667.2917\n", 422 | "Epoch Avg Loss: 2667.2917\n", 423 | "Epoch: 58\n", 424 | "totalsongs(1, 1062, 10000)\n", 425 | "Song loss: 2666.2083\n", 426 | "Curr Epoch: 58 Curr Batch: 0/1\n", 427 | "Batch Loss: 2666.2083\n", 428 | "Epoch Avg Loss: 2666.2083\n", 429 | "Epoch: 59\n", 430 | "totalsongs(1, 1062, 10000)\n", 431 | "Song loss: 2665.1345\n", 432 | "Curr Epoch: 59 Curr Batch: 0/1\n", 433 | "Batch Loss: 2665.1345\n", 434 | "Epoch Avg Loss: 2665.1345\n", 435 | "Epoch: 60\n", 436 | "totalsongs(1, 1062, 10000)\n", 437 | "Song loss: 2664.053\n", 438 | "Curr Epoch: 60 Curr Batch: 0/1\n", 439 | "Batch Loss: 2664.053\n", 440 | "Epoch Avg Loss: 2664.053\n", 441 | "Epoch: 61\n", 442 | "totalsongs(1, 1062, 10000)\n", 443 | "Song loss: 2662.9692\n", 444 | "Curr Epoch: 61 Curr Batch: 0/1\n", 445 | "Batch Loss: 2662.9692\n", 446 | "Epoch Avg Loss: 2662.9692\n", 447 | "Epoch: 62\n", 448 | "totalsongs(1, 1062, 10000)\n", 449 | "Song loss: 2661.9136\n", 450 | "Curr Epoch: 62 Curr Batch: 0/1\n", 451 | "Batch Loss: 2661.9136\n", 452 | "Epoch Avg Loss: 2661.9136\n", 453 | "Epoch: 63\n", 454 | "totalsongs(1, 1062, 10000)\n", 455 | "Song loss: 2660.8782\n", 456 | "Curr Epoch: 63 Curr Batch: 0/1\n", 457 | "Batch Loss: 2660.8782\n", 458 | "Epoch Avg Loss: 2660.8782\n", 459 | "Epoch: 64\n", 460 | "totalsongs(1, 1062, 10000)\n", 461 | "Song loss: 2659.87\n", 462 | "Curr Epoch: 64 Curr Batch: 0/1\n", 463 | "Batch Loss: 2659.87\n", 464 | "Epoch Avg Loss: 2659.87\n", 465 | "Epoch: 65\n", 466 | "totalsongs(1, 1062, 10000)\n", 467 | "Song loss: 2658.871\n", 468 | "Curr Epoch: 65 Curr Batch: 0/1\n", 469 | "Batch Loss: 2658.871\n", 470 | "Epoch Avg Loss: 2658.871\n", 471 | "Epoch: 66\n", 472 | "totalsongs(1, 1062, 10000)\n", 473 | "Song loss: 2657.871\n", 474 | "Curr Epoch: 66 Curr Batch: 0/1\n", 475 | "Batch Loss: 2657.871\n", 476 | "Epoch Avg Loss: 2657.871\n", 477 | "Epoch: 67\n", 478 | "totalsongs(1, 1062, 10000)\n", 479 | "Song loss: 2656.8755\n", 480 | "Curr Epoch: 67 Curr Batch: 0/1\n", 481 | "Batch Loss: 2656.8755\n", 482 | "Epoch Avg Loss: 2656.8755\n", 483 | "Epoch: 68\n", 484 | "totalsongs(1, 1062, 10000)\n", 485 | "Song loss: 2655.8926\n", 486 | "Curr Epoch: 68 Curr Batch: 0/1\n", 487 | "Batch Loss: 2655.8926\n", 488 | "Epoch Avg Loss: 2655.8926\n", 489 | "Epoch: 69\n", 490 | "totalsongs(1, 1062, 10000)\n", 491 | "Song loss: 2654.9253\n", 492 | "Curr Epoch: 69 Curr Batch: 0/1\n", 493 | "Batch Loss: 2654.9253\n", 494 | "Epoch Avg Loss: 2654.9253\n", 495 | "Epoch: 70\n", 496 | "totalsongs(1, 1062, 10000)\n", 497 | "Song loss: 2653.9653\n", 498 | "Curr Epoch: 70 Curr Batch: 0/1\n", 499 | "Batch Loss: 2653.9653\n", 500 | "Epoch Avg Loss: 2653.9653\n", 501 | "Epoch: 71\n", 502 | "totalsongs(1, 1062, 10000)\n", 503 | "Song loss: 2653.0088\n", 504 | "Curr Epoch: 71 Curr Batch: 0/1\n", 505 | "Batch Loss: 2653.0088\n", 506 | "Epoch Avg Loss: 2653.0088\n", 507 | "Epoch: 72\n", 508 | "totalsongs(1, 1062, 10000)\n", 509 | "Song loss: 2652.0562\n", 510 | "Curr Epoch: 72 Curr Batch: 0/1\n", 511 | "Batch Loss: 2652.0562\n", 512 | "Epoch Avg Loss: 2652.0562\n", 513 | "Epoch: 73\n", 514 | "totalsongs(1, 1062, 10000)\n" 515 | ] 516 | } 517 | ], 518 | "source": [ 519 | "#!/usr/bin/env python\n", 520 | "# coding: utf-8\n", 521 | "#@Author: Wajahat Waheed\n", 522 | "import math\n", 523 | "import tensorflow as tf\n", 524 | "import numpy as np\n", 525 | "from functools import partial\n", 526 | "import numpy as np\n", 527 | "import matplotlib.pyplot as plt\n", 528 | "import tensorflow as tf\n", 529 | "from tensorflow.contrib.framework.python.ops import audio_ops\n", 530 | "from tensorflow.contrib import ffmpeg\n", 531 | "from scipy.fftpack import rfft, irfft\n", 532 | "from glob import iglob\n", 533 | "from pydub import AudioSegment\n", 534 | "import pickle\n", 535 | "DATA_FILES_MP3 = 'audio'\n", 536 | "DATA_FILES_WAV = r'F:\\tfaudio\\audiowav' #This will be your directory of the training audio waves\n", 537 | "file_arr = []\n", 538 | "curr_batch = 0\n", 539 | "def convert_mp3_to_wav():\n", 540 | " index = 0\n", 541 | " for file in iglob(DATA_FILES_MP3 + '/*.mp3'):\n", 542 | " mp3_to_wav = AudioSegment.from_mp3(file)\n", 543 | " mp3_to_wav.export(DATA_FILES_WAV+'/'+str(index)+'.wav', format='wav')\n", 544 | " index += 1\n", 545 | "def process_wav():\n", 546 | " file_range = 0\n", 547 | " for file in iglob(DATA_FILES_WAV +'\\*.wav'):\n", 548 | " file_arr.append(file)\n", 549 | "def get_next_batch(curr_batch, songs_per_batch, sess):\n", 550 | " wav_arr_ch1 = []\n", 551 | " wav_arr_ch2 = []\n", 552 | " if (curr_batch) >= (len(file_arr)):\n", 553 | " curr_batch = 0\n", 554 | " start_position = curr_batch * songs_per_batch\n", 555 | " end_position = start_position + songs_per_batch\n", 556 | " print(start_position, end_position)\n", 557 | " for idx in range(start_position, end_position):\n", 558 | " print(\"See this:\",file_arr[idx])\n", 559 | " audio_binary = tf.read_file(file_arr[idx])\n", 560 | " wav_decoder = audio_ops.decode_wav(audio_binary,desired_channels=2)\n", 561 | " sample_rate, audio = sess.run([wav_decoder.sample_rate, wav_decoder.audio])\n", 562 | " audio = np.array(audio)\n", 563 | " print('Audio shape', str(audio.shape))\n", 564 | " if len(audio[:, 0]) != 5314197: \n", 565 | " print(\"yes\")\n", 566 | " continue\n", 567 | " wav_arr_ch1.append(rfft(audio[:,0]))\n", 568 | " wav_arr_ch2.append(rfft(audio[:,1]))\n", 569 | " print(\"Returning File: \" + file_arr[idx])\n", 570 | " return wav_arr_ch1, wav_arr_ch2, sample_rate\n", 571 | "\n", 572 | "def save_to_wav( enc_song_ch1, enc_song_ch2, audio_arr_ch1, audio_arr_ch2, sample_rate, original_song_ch1, original_song_ch2, idty, folder, sess, dump=False):\n", 573 | " #sample_rate = 44100\n", 574 | " audio_arr_ch1 = irfft(np.hstack(np.hstack(audio_arr_ch1)))\n", 575 | " audio_arr_ch2 = irfft(np.hstack(np.hstack(audio_arr_ch2)))\n", 576 | "\n", 577 | " original_song_ch1 = irfft(np.hstack(np.hstack(original_song_ch1)))\n", 578 | " original_song_ch2 = irfft(np.hstack(np.hstack(original_song_ch2)))\n", 579 | " \n", 580 | " enc_song_ch1 = irfft(np.hstack(np.hstack(enc_song_ch1)))\n", 581 | " enc_song_ch2 = irfft(np.hstack(np.hstack(enc_song_ch2)))\n", 582 | "\n", 583 | " original_song = np.hstack(np.array((original_song_ch1, original_song_ch2)).T)\n", 584 | " audio_arr = np.hstack(np.array((audio_arr_ch1, audio_arr_ch2)).T)\n", 585 | " enc_song = np.hstack(np.array((enc_song_ch1, enc_song_ch2)).T)\n", 586 | " w = np.linspace(0, sample_rate, len(audio_arr))\n", 587 | " w = w[0:len(audio_arr)]\n", 588 | " \n", 589 | " plt.figure(1)\n", 590 | " plt.plot(w, original_song)\n", 591 | " plt.savefig(str(folder) + '/original.png')\n", 592 | " plt.plot(w, audio_arr)\n", 593 | " plt.xlabel('sample')\n", 594 | " plt.ylabel('amplitude')\n", 595 | " plt.savefig(str(folder) + '/compressed' + str(idty) + '.png')\n", 596 | " #plt.clf()\n", 597 | " plt.show()\n", 598 | " cols = 2\n", 599 | " rows = math.floor(len(audio_arr)/2)\n", 600 | " audio_arr = audio_arr.reshape(rows, cols)\n", 601 | " original_song = original_song.reshape(rows, cols)\n", 602 | " enc_song = enc_song.reshape(math.floor(len(enc_song)/2), cols) \n", 603 | " if dump == True:\n", 604 | " from scipy.io import wavfile\n", 605 | " print('aga')\n", 606 | " wavfile.write('Reconstructedsongforcontin'+str(idty)+'.wav', sample_rate, audio_arr)\n", 607 | " wavfile.write('origal_songforcontin.wav', sample_rate, original_song)\n", 608 | " pickle.dump(enc_song, open( \"enc_song.p\", \"wb\" ) )\n", 609 | " \n", 610 | "def next_batch(c_batch, batch_size, sess):\n", 611 | " ch1_arr = []\n", 612 | " ch2_arr = []\n", 613 | " wav_arr_ch1, wav_arr_ch2, sample_rate = get_next_batch(c_batch, batch_size, sess)\n", 614 | "\n", 615 | " for sub_arr in wav_arr_ch1:\n", 616 | " batch_size_ch1 = math.floor(len(sub_arr)/inputs)\n", 617 | " sub_arr = sub_arr[:(batch_size_ch1*inputs)]\n", 618 | " ch1_arr.append(np.array(sub_arr).reshape(batch_size_ch1, inputs))\n", 619 | "\n", 620 | " for sub_arr in wav_arr_ch2:\n", 621 | " batch_size_ch2 = math.floor(len(sub_arr)/inputs)\n", 622 | " sub_arr = sub_arr[:(batch_size_ch2*inputs)]\n", 623 | " ch2_arr.append(np.array(sub_arr).reshape(batch_size_ch2, inputs))\n", 624 | "\n", 625 | " return np.array(ch1_arr), np.array(ch2_arr), sample_rate\n", 626 | "\n", 627 | "#LOSS_OUT_FILE = 'Epoch_Loss.txt'\n", 628 | "process_wav()\n", 629 | "inputs = 10000\n", 630 | "#Learning rate\n", 631 | "lr = 0.00005\n", 632 | "# L2 regularization\n", 633 | "l2 = 0.00005\n", 634 | "hidden_1_size = 7000\n", 635 | "hidden_2_size = 4440\n", 636 | "hidden_3_size = 1800\n", 637 | "epochs = 50000\n", 638 | "batch_size = 1\n", 639 | "batches = 1\n", 640 | "X = tf.placeholder(tf.float32, shape=[None, inputs])\n", 641 | "l2_regularizer = tf.contrib.layers.l2_regularizer(l2)\n", 642 | "#saver = tf.train.Saver()\n", 643 | "autoencoder_dnn = partial(tf.layers.dense, activation = tf.nn.elu,kernel_initializer = tf.contrib.layers.variance_scaling_initializer(),kernel_regularizer= tf.contrib.layers.l2_regularizer(l2))\n", 644 | "hidden_1 = autoencoder_dnn(X, hidden_1_size)\n", 645 | "hidden_2 = autoencoder_dnn(hidden_1, hidden_2_size)\n", 646 | "hidden_4 = autoencoder_dnn(hidden_2, hidden_3_size)\n", 647 | "hidden_5 = autoencoder_dnn(hidden_4, hidden_2_size)\n", 648 | "outputs = autoencoder_dnn(hidden_5, inputs, activation=None)\n", 649 | "#encoder = autoencoder_dnn(inputs, hidden_4, activation=None)\n", 650 | "#decoder = autoencoder_dnn(hidden_4, inputs, activation=None)\n", 651 | "reconstruction_loss = tf.reduce_mean(tf.square(outputs-X))\n", 652 | "reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n", 653 | "loss = tf.add_n([reconstruction_loss] + reg_loss)\n", 654 | "optimizer = tf.train.AdamOptimizer(lr)\n", 655 | "training_op = optimizer.minimize(loss)\n", 656 | "init = tf.global_variables_initializer()\n", 657 | "saver = tf.train.Saver()\n", 658 | "\n", 659 | "##### Run training\n", 660 | "with tf.Session() as sess:\n", 661 | " init.run()\n", 662 | " saver.restore(sess, \"./model.ckpt\")\n", 663 | " ch1_song, ch2_song, sample_rate = next_batch(4, batch_size, sess)\n", 664 | " for epoch in range(epochs):\n", 665 | " epoch_loss = []\n", 666 | " print(\"Epoch: \" + str(epoch))\n", 667 | " for i in range(batches): \n", 668 | " # print(\"ch1_song\",ch1_song) \n", 669 | " total_songs = np.hstack([ch1_song, ch2_song])\n", 670 | " print('totalsongs' + str(total_songs.shape))\n", 671 | " batch_loss = []\n", 672 | " for j in range(len(total_songs)):\n", 673 | " x_batch = total_songs[j]\n", 674 | " _, l = sess.run([training_op, loss], feed_dict={X:x_batch})\n", 675 | " batch_loss.append(l)\n", 676 | " print(\"Song loss: \" + str(l))\n", 677 | "\n", 678 | " print(\"Curr Epoch: \" + str(epoch) + \" Curr Batch: \" + str(i) + \"/\"+ str(batches))\n", 679 | " print(\"Batch Loss: \" + str(np.mean(batch_loss)))\n", 680 | " epoch_loss.append(np.mean(batch_loss))\n", 681 | " print(\"Epoch Avg Loss: \" + str(np.mean(epoch_loss)))\n", 682 | " if epoch == 50:\n", 683 | " ch1_song_new, ch2_song_new, sample_rate_new = ch1_song, ch2_song, sample_rate\n", 684 | " #ch1_song_new, ch2_song_new, sample_rate_new = next_batch(0, 1,sess)\n", 685 | " \n", 686 | " # print(\"ch1\",ch1_song_new)\n", 687 | " # print(\"ch2\",ch2_song_new)\n", 688 | " x_batch = np.hstack([ch1_song_new, ch2_song_new])[0]\n", 689 | " print(\"Sample rate: \" + str(sample_rate_new))\n", 690 | "\n", 691 | " orig_song = []\n", 692 | " full_song = []\n", 693 | " encoded_song = []\n", 694 | " encoded = hidden_4.eval(feed_dict={X: x_batch})\n", 695 | " print('encodedshape' + str(encoded.shape))\n", 696 | " evaluation = outputs.eval(feed_dict={X: x_batch})\n", 697 | " print(\"Output: \" + str(evaluation))\n", 698 | " print('evaluation'+str(evaluation.shape))\n", 699 | " print('x_batch'+str(x_batch.shape))\n", 700 | " encoded_song.append(encoded)\n", 701 | " full_song.append(evaluation)\n", 702 | " orig_song.append(x_batch)\n", 703 | "\n", 704 | " # Merge the nested arrays\n", 705 | " orig_song = np.hstack(orig_song)\n", 706 | " full_song = np.hstack(full_song)\n", 707 | " encoded_song = np.hstack(encoded_song)\n", 708 | "\n", 709 | " # Compute and split the channels\n", 710 | " orig_song_ch1 = orig_song[:math.floor(len(orig_song)/2)]\n", 711 | " orig_song_ch2 = orig_song[math.floor(len(orig_song)/2):]\n", 712 | " full_song_ch1 = full_song[:math.floor(len(full_song)/2)]\n", 713 | " full_song_ch2 = full_song[math.floor(len(full_song)/2):]\n", 714 | " enc_song_ch1 = encoded_song[:math.floor(len(encoded_song)/2)]\n", 715 | " enc_song_ch2 = encoded_song[math.floor(len(encoded_song)/2):]\n", 716 | "\n", 717 | " # Save both the untouched song and reconstructed song to the 'output' folder\n", 718 | " save_to_wav(enc_song_ch1, enc_song_ch2, full_song_ch1, full_song_ch2, sample_rate, orig_song_ch1, orig_song_ch2, epoch, 'output', sess, True)\n", 719 | " #saver.save(sess, './model.ckpt')" 720 | ] 721 | } 722 | ], 723 | "metadata": { 724 | "kernelspec": { 725 | "display_name": "Python 3", 726 | "language": "python", 727 | "name": "python3" 728 | }, 729 | "language_info": { 730 | "codemirror_mode": { 731 | "name": "ipython", 732 | "version": 3 733 | }, 734 | "file_extension": ".py", 735 | "mimetype": "text/x-python", 736 | "name": "python", 737 | "nbconvert_exporter": "python", 738 | "pygments_lexer": "ipython3", 739 | "version": "3.7.3" 740 | } 741 | }, 742 | "nbformat": 4, 743 | "nbformat_minor": 2 744 | } 745 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AudioCompression using AutoEncoder 2 | # Machine Learning is replacing previous techniques in each field 3 | # So I thought why not try Machine Learning to compress the audio 4 | 5 | Nodes for each layer can be tweeked, you can compress an audio upto 23 TIMES or even more (I haven't tried it myself but you obviously can)! 6 | 7 | Your dataset to train the AutoEncoder Model needs to be .wav files since the libraries used in the code are not quite friendly with other formats. 8 | 9 | # Must note that the compression is for Data Transfer and Data Transmission not Storage since you will get a pickle file which you can decode on the other end! (Both available in the code) 10 | 11 | Code of both Encoding and Decoding is available in the jupyter file. Happy Compressing! 12 | 13 | If you have a problem getting the dataset, feel free to reach out to me @ waheed@pnw.edu 14 | 15 | This project was supervised by Professor Orhan Arikan, Department Chair of Bilkent University's EEE department. 16 | --------------------------------------------------------------------------------