├── AudioCompression_using_AutoEncoder_Machine_Learning.ipynb
└── README.md


/AudioCompression_using_AutoEncoder_Machine_Learning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "WARNING: Logging before flag parsing goes to stderr.\n",
 13 |       "W0113 00:53:51.602934 33696 deprecation.py:323] From <ipython-input-1-a9e0b4860b72>:176: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n",
 14 |       "Instructions for updating:\n",
 15 |       "Use keras.layers.dense instead.\n"
 16 |      ]
 17 |     },
 18 |     {
 19 |      "name": "stdout",
 20 |      "output_type": "stream",
 21 |      "text": [
 22 |       "I GOT HERE\n"
 23 |      ]
 24 |     },
 25 |     {
 26 |      "name": "stderr",
 27 |      "output_type": "stream",
 28 |      "text": [
 29 |       "W0113 00:54:11.267351 33696 deprecation.py:323] From c:\\anaconda3\\lib\\site-packages\\tensorflow\\python\\training\\saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n",
 30 |       "Instructions for updating:\n",
 31 |       "Use standard file APIs to check for files with this prefix.\n"
 32 |      ]
 33 |     },
 34 |     {
 35 |      "name": "stdout",
 36 |      "output_type": "stream",
 37 |      "text": [
 38 |       "4 5\n",
 39 |       "See this: F:\\tfaudio\\audiowav\\r (15).wav\n",
 40 |       "audio shape (5314197, 2)\n",
 41 |       "Returning File: F:\\tfaudio\\audiowav\\r (15).wav\n",
 42 |       "Epoch: 0\n",
 43 |       "totalsongs(1, 1062, 10000)\n",
 44 |       "Song loss: 2897.7048\n",
 45 |       "Curr Epoch: 0 Curr Batch: 0/1\n",
 46 |       "Batch Loss: 2897.7048\n",
 47 |       "Epoch Avg Loss: 2897.7048\n",
 48 |       "Epoch: 1\n",
 49 |       "totalsongs(1, 1062, 10000)\n",
 50 |       "Song loss: 2868.833\n",
 51 |       "Curr Epoch: 1 Curr Batch: 0/1\n",
 52 |       "Batch Loss: 2868.833\n",
 53 |       "Epoch Avg Loss: 2868.833\n",
 54 |       "Epoch: 2\n",
 55 |       "totalsongs(1, 1062, 10000)\n",
 56 |       "Song loss: 2834.8965\n",
 57 |       "Curr Epoch: 2 Curr Batch: 0/1\n",
 58 |       "Batch Loss: 2834.8965\n",
 59 |       "Epoch Avg Loss: 2834.8965\n",
 60 |       "Epoch: 3\n",
 61 |       "totalsongs(1, 1062, 10000)\n",
 62 |       "Song loss: 2814.4148\n",
 63 |       "Curr Epoch: 3 Curr Batch: 0/1\n",
 64 |       "Batch Loss: 2814.4148\n",
 65 |       "Epoch Avg Loss: 2814.4148\n",
 66 |       "Epoch: 4\n",
 67 |       "totalsongs(1, 1062, 10000)\n",
 68 |       "Song loss: 2808.1172\n",
 69 |       "Curr Epoch: 4 Curr Batch: 0/1\n",
 70 |       "Batch Loss: 2808.1172\n",
 71 |       "Epoch Avg Loss: 2808.1172\n",
 72 |       "Epoch: 5\n",
 73 |       "totalsongs(1, 1062, 10000)\n",
 74 |       "Song loss: 2805.7178\n",
 75 |       "Curr Epoch: 5 Curr Batch: 0/1\n",
 76 |       "Batch Loss: 2805.7178\n",
 77 |       "Epoch Avg Loss: 2805.7178\n",
 78 |       "Epoch: 6\n",
 79 |       "totalsongs(1, 1062, 10000)\n",
 80 |       "Song loss: 2800.1287\n",
 81 |       "Curr Epoch: 6 Curr Batch: 0/1\n",
 82 |       "Batch Loss: 2800.1287\n",
 83 |       "Epoch Avg Loss: 2800.1287\n",
 84 |       "Epoch: 7\n",
 85 |       "totalsongs(1, 1062, 10000)\n",
 86 |       "Song loss: 2791.9536\n",
 87 |       "Curr Epoch: 7 Curr Batch: 0/1\n",
 88 |       "Batch Loss: 2791.9536\n",
 89 |       "Epoch Avg Loss: 2791.9536\n",
 90 |       "Epoch: 8\n",
 91 |       "totalsongs(1, 1062, 10000)\n",
 92 |       "Song loss: 2785.9773\n",
 93 |       "Curr Epoch: 8 Curr Batch: 0/1\n",
 94 |       "Batch Loss: 2785.9773\n",
 95 |       "Epoch Avg Loss: 2785.9773\n",
 96 |       "Epoch: 9\n",
 97 |       "totalsongs(1, 1062, 10000)\n",
 98 |       "Song loss: 2784.0908\n",
 99 |       "Curr Epoch: 9 Curr Batch: 0/1\n",
100 |       "Batch Loss: 2784.0908\n",
101 |       "Epoch Avg Loss: 2784.0908\n",
102 |       "Epoch: 10\n",
103 |       "totalsongs(1, 1062, 10000)\n",
104 |       "Song loss: 2783.079\n",
105 |       "Curr Epoch: 10 Curr Batch: 0/1\n",
106 |       "Batch Loss: 2783.079\n",
107 |       "Epoch Avg Loss: 2783.079\n",
108 |       "Epoch: 11\n",
109 |       "totalsongs(1, 1062, 10000)\n",
110 |       "Song loss: 2779.0698\n",
111 |       "Curr Epoch: 11 Curr Batch: 0/1\n",
112 |       "Batch Loss: 2779.0698\n",
113 |       "Epoch Avg Loss: 2779.0698\n",
114 |       "Epoch: 12\n",
115 |       "totalsongs(1, 1062, 10000)\n",
116 |       "Song loss: 2772.0159\n",
117 |       "Curr Epoch: 12 Curr Batch: 0/1\n",
118 |       "Batch Loss: 2772.0159\n",
119 |       "Epoch Avg Loss: 2772.0159\n",
120 |       "Epoch: 13\n",
121 |       "totalsongs(1, 1062, 10000)\n",
122 |       "Song loss: 2764.721\n",
123 |       "Curr Epoch: 13 Curr Batch: 0/1\n",
124 |       "Batch Loss: 2764.721\n",
125 |       "Epoch Avg Loss: 2764.721\n",
126 |       "Epoch: 14\n",
127 |       "totalsongs(1, 1062, 10000)\n",
128 |       "Song loss: 2759.0554\n",
129 |       "Curr Epoch: 14 Curr Batch: 0/1\n",
130 |       "Batch Loss: 2759.0554\n",
131 |       "Epoch Avg Loss: 2759.0554\n",
132 |       "Epoch: 15\n",
133 |       "totalsongs(1, 1062, 10000)\n",
134 |       "Song loss: 2754.6016\n",
135 |       "Curr Epoch: 15 Curr Batch: 0/1\n",
136 |       "Batch Loss: 2754.6016\n",
137 |       "Epoch Avg Loss: 2754.6016\n",
138 |       "Epoch: 16\n",
139 |       "totalsongs(1, 1062, 10000)\n",
140 |       "Song loss: 2750.2036\n",
141 |       "Curr Epoch: 16 Curr Batch: 0/1\n",
142 |       "Batch Loss: 2750.2036\n",
143 |       "Epoch Avg Loss: 2750.2036\n",
144 |       "Epoch: 17\n",
145 |       "totalsongs(1, 1062, 10000)\n",
146 |       "Song loss: 2745.731\n",
147 |       "Curr Epoch: 17 Curr Batch: 0/1\n",
148 |       "Batch Loss: 2745.731\n",
149 |       "Epoch Avg Loss: 2745.731\n",
150 |       "Epoch: 18\n",
151 |       "totalsongs(1, 1062, 10000)\n",
152 |       "Song loss: 2741.9744\n",
153 |       "Curr Epoch: 18 Curr Batch: 0/1\n",
154 |       "Batch Loss: 2741.9744\n",
155 |       "Epoch Avg Loss: 2741.9744\n",
156 |       "Epoch: 19\n",
157 |       "totalsongs(1, 1062, 10000)\n",
158 |       "Song loss: 2739.3975\n",
159 |       "Curr Epoch: 19 Curr Batch: 0/1\n",
160 |       "Batch Loss: 2739.3975\n",
161 |       "Epoch Avg Loss: 2739.3975\n",
162 |       "Epoch: 20\n",
163 |       "totalsongs(1, 1062, 10000)\n",
164 |       "Song loss: 2737.3164\n",
165 |       "Curr Epoch: 20 Curr Batch: 0/1\n",
166 |       "Batch Loss: 2737.3164\n",
167 |       "Epoch Avg Loss: 2737.3164\n",
168 |       "Epoch: 21\n",
169 |       "totalsongs(1, 1062, 10000)\n",
170 |       "Song loss: 2734.51\n",
171 |       "Curr Epoch: 21 Curr Batch: 0/1\n",
172 |       "Batch Loss: 2734.51\n",
173 |       "Epoch Avg Loss: 2734.51\n",
174 |       "Epoch: 22\n",
175 |       "totalsongs(1, 1062, 10000)\n",
176 |       "Song loss: 2730.5425\n",
177 |       "Curr Epoch: 22 Curr Batch: 0/1\n",
178 |       "Batch Loss: 2730.5425\n",
179 |       "Epoch Avg Loss: 2730.5425\n",
180 |       "Epoch: 23\n",
181 |       "totalsongs(1, 1062, 10000)\n",
182 |       "Song loss: 2726.2256\n",
183 |       "Curr Epoch: 23 Curr Batch: 0/1\n",
184 |       "Batch Loss: 2726.2256\n",
185 |       "Epoch Avg Loss: 2726.2256\n",
186 |       "Epoch: 24\n",
187 |       "totalsongs(1, 1062, 10000)\n",
188 |       "Song loss: 2722.7183\n",
189 |       "Curr Epoch: 24 Curr Batch: 0/1\n",
190 |       "Batch Loss: 2722.7183\n",
191 |       "Epoch Avg Loss: 2722.7183\n",
192 |       "Epoch: 25\n",
193 |       "totalsongs(1, 1062, 10000)\n",
194 |       "Song loss: 2720.3591\n",
195 |       "Curr Epoch: 25 Curr Batch: 0/1\n",
196 |       "Batch Loss: 2720.3591\n",
197 |       "Epoch Avg Loss: 2720.3591\n",
198 |       "Epoch: 26\n",
199 |       "totalsongs(1, 1062, 10000)\n",
200 |       "Song loss: 2718.4792\n",
201 |       "Curr Epoch: 26 Curr Batch: 0/1\n",
202 |       "Batch Loss: 2718.4792\n",
203 |       "Epoch Avg Loss: 2718.4792\n",
204 |       "Epoch: 27\n",
205 |       "totalsongs(1, 1062, 10000)\n",
206 |       "Song loss: 2716.2288\n",
207 |       "Curr Epoch: 27 Curr Batch: 0/1\n",
208 |       "Batch Loss: 2716.2288\n",
209 |       "Epoch Avg Loss: 2716.2288\n",
210 |       "Epoch: 28\n",
211 |       "totalsongs(1, 1062, 10000)\n",
212 |       "Song loss: 2713.4785\n",
213 |       "Curr Epoch: 28 Curr Batch: 0/1\n",
214 |       "Batch Loss: 2713.4785\n",
215 |       "Epoch Avg Loss: 2713.4785\n",
216 |       "Epoch: 29\n",
217 |       "totalsongs(1, 1062, 10000)\n",
218 |       "Song loss: 2710.7495\n",
219 |       "Curr Epoch: 29 Curr Batch: 0/1\n",
220 |       "Batch Loss: 2710.7495\n",
221 |       "Epoch Avg Loss: 2710.7495\n",
222 |       "Epoch: 30\n",
223 |       "totalsongs(1, 1062, 10000)\n",
224 |       "Song loss: 2708.5298\n",
225 |       "Curr Epoch: 30 Curr Batch: 0/1\n",
226 |       "Batch Loss: 2708.5298\n",
227 |       "Epoch Avg Loss: 2708.5298\n",
228 |       "Epoch: 31\n",
229 |       "totalsongs(1, 1062, 10000)\n",
230 |       "Song loss: 2706.7776\n",
231 |       "Curr Epoch: 31 Curr Batch: 0/1\n",
232 |       "Batch Loss: 2706.7776\n",
233 |       "Epoch Avg Loss: 2706.7776\n",
234 |       "Epoch: 32\n",
235 |       "totalsongs(1, 1062, 10000)\n",
236 |       "Song loss: 2705.042\n",
237 |       "Curr Epoch: 32 Curr Batch: 0/1\n",
238 |       "Batch Loss: 2705.042\n",
239 |       "Epoch Avg Loss: 2705.042\n",
240 |       "Epoch: 33\n",
241 |       "totalsongs(1, 1062, 10000)\n",
242 |       "Song loss: 2702.984\n",
243 |       "Curr Epoch: 33 Curr Batch: 0/1\n",
244 |       "Batch Loss: 2702.984\n",
245 |       "Epoch Avg Loss: 2702.984\n",
246 |       "Epoch: 34\n",
247 |       "totalsongs(1, 1062, 10000)\n",
248 |       "Song loss: 2700.694\n",
249 |       "Curr Epoch: 34 Curr Batch: 0/1\n",
250 |       "Batch Loss: 2700.694\n",
251 |       "Epoch Avg Loss: 2700.694\n",
252 |       "Epoch: 35\n",
253 |       "totalsongs(1, 1062, 10000)\n",
254 |       "Song loss: 2698.5034\n",
255 |       "Curr Epoch: 35 Curr Batch: 0/1\n",
256 |       "Batch Loss: 2698.5034\n",
257 |       "Epoch Avg Loss: 2698.5034\n",
258 |       "Epoch: 36\n",
259 |       "totalsongs(1, 1062, 10000)\n",
260 |       "Song loss: 2696.6104\n",
261 |       "Curr Epoch: 36 Curr Batch: 0/1\n",
262 |       "Batch Loss: 2696.6104\n",
263 |       "Epoch Avg Loss: 2696.6104\n",
264 |       "Epoch: 37\n",
265 |       "totalsongs(1, 1062, 10000)\n",
266 |       "Song loss: 2694.9312\n",
267 |       "Curr Epoch: 37 Curr Batch: 0/1\n",
268 |       "Batch Loss: 2694.9312\n",
269 |       "Epoch Avg Loss: 2694.9312\n",
270 |       "Epoch: 38\n",
271 |       "totalsongs(1, 1062, 10000)\n",
272 |       "Song loss: 2693.271\n",
273 |       "Curr Epoch: 38 Curr Batch: 0/1\n",
274 |       "Batch Loss: 2693.271\n",
275 |       "Epoch Avg Loss: 2693.271\n",
276 |       "Epoch: 39\n",
277 |       "totalsongs(1, 1062, 10000)\n",
278 |       "Song loss: 2691.544\n",
279 |       "Curr Epoch: 39 Curr Batch: 0/1\n",
280 |       "Batch Loss: 2691.544\n",
281 |       "Epoch Avg Loss: 2691.544\n",
282 |       "Epoch: 40\n",
283 |       "totalsongs(1, 1062, 10000)\n",
284 |       "Song loss: 2689.8262\n",
285 |       "Curr Epoch: 40 Curr Batch: 0/1\n",
286 |       "Batch Loss: 2689.8262\n",
287 |       "Epoch Avg Loss: 2689.8262\n",
288 |       "Epoch: 41\n",
289 |       "totalsongs(1, 1062, 10000)\n",
290 |       "Song loss: 2688.2156\n",
291 |       "Curr Epoch: 41 Curr Batch: 0/1\n",
292 |       "Batch Loss: 2688.2156\n",
293 |       "Epoch Avg Loss: 2688.2156\n",
294 |       "Epoch: 42\n",
295 |       "totalsongs(1, 1062, 10000)\n",
296 |       "Song loss: 2686.7153\n",
297 |       "Curr Epoch: 42 Curr Batch: 0/1\n",
298 |       "Batch Loss: 2686.7153\n",
299 |       "Epoch Avg Loss: 2686.7153\n",
300 |       "Epoch: 43\n",
301 |       "totalsongs(1, 1062, 10000)\n",
302 |       "Song loss: 2685.2532\n",
303 |       "Curr Epoch: 43 Curr Batch: 0/1\n",
304 |       "Batch Loss: 2685.2532\n",
305 |       "Epoch Avg Loss: 2685.2532\n",
306 |       "Epoch: 44\n",
307 |       "totalsongs(1, 1062, 10000)\n",
308 |       "Song loss: 2683.7788\n",
309 |       "Curr Epoch: 44 Curr Batch: 0/1\n",
310 |       "Batch Loss: 2683.7788\n",
311 |       "Epoch Avg Loss: 2683.7788\n",
312 |       "Epoch: 45\n",
313 |       "totalsongs(1, 1062, 10000)\n",
314 |       "Song loss: 2682.3123\n",
315 |       "Curr Epoch: 45 Curr Batch: 0/1\n",
316 |       "Batch Loss: 2682.3123\n",
317 |       "Epoch Avg Loss: 2682.3123\n",
318 |       "Epoch: 46\n",
319 |       "totalsongs(1, 1062, 10000)\n",
320 |       "Song loss: 2680.8975\n",
321 |       "Curr Epoch: 46 Curr Batch: 0/1\n",
322 |       "Batch Loss: 2680.8975\n",
323 |       "Epoch Avg Loss: 2680.8975\n",
324 |       "Epoch: 47\n",
325 |       "totalsongs(1, 1062, 10000)\n",
326 |       "Song loss: 2679.558\n",
327 |       "Curr Epoch: 47 Curr Batch: 0/1\n",
328 |       "Batch Loss: 2679.558\n",
329 |       "Epoch Avg Loss: 2679.558\n",
330 |       "Epoch: 48\n",
331 |       "totalsongs(1, 1062, 10000)\n",
332 |       "Song loss: 2678.2349\n",
333 |       "Curr Epoch: 48 Curr Batch: 0/1\n",
334 |       "Batch Loss: 2678.2349\n",
335 |       "Epoch Avg Loss: 2678.2349\n",
336 |       "Epoch: 49\n",
337 |       "totalsongs(1, 1062, 10000)\n",
338 |       "Song loss: 2676.9092\n",
339 |       "Curr Epoch: 49 Curr Batch: 0/1\n",
340 |       "Batch Loss: 2676.9092\n",
341 |       "Epoch Avg Loss: 2676.9092\n",
342 |       "Epoch: 50\n",
343 |       "totalsongs(1, 1062, 10000)\n",
344 |       "Song loss: 2675.6008\n",
345 |       "Curr Epoch: 50 Curr Batch: 0/1\n",
346 |       "Batch Loss: 2675.6008\n",
347 |       "Epoch Avg Loss: 2675.6008\n",
348 |       "Sample rate: 44100\n",
349 |       "encodedshape(1062, 1800)\n",
350 |       "Output: [[ 7.3477707e+01  6.4577866e+01  1.0814214e+01 ...  2.3098246e+04\n",
351 |       "   1.1645315e+04 -9.3722504e+02]\n",
352 |       " [-8.3661152e+03  2.1774602e+03  7.2459775e+03 ... -1.6125327e+02\n",
353 |       "   1.0913843e+03  4.0661896e+02]\n",
354 |       " [-6.2134350e+01 -1.4534984e+03  7.1217517e+02 ... -7.4994452e+02\n",
355 |       "  -2.0445712e+02  6.6502631e+02]\n",
356 |       " ...\n",
357 |       " [ 1.3832479e+00  2.1197855e-02  1.4356872e-01 ...  3.0875385e-02\n",
358 |       "   9.9974066e-02 -8.3744198e-02]\n",
359 |       " [ 1.4182310e+00  2.9370368e-02  1.3619992e-01 ...  2.7370155e-02\n",
360 |       "   1.0397628e-01 -9.1196567e-02]\n",
361 |       " [ 1.2922759e+00  6.7051947e-02  1.3771275e-01 ...  1.7371118e-02\n",
362 |       "   8.6671740e-02 -6.0075134e-02]]\n",
363 |       "evaluation(1062, 10000)\n",
364 |       "x_batch(1062, 10000)\n"
365 |      ]
366 |     },
367 |     {
368 |      "data": {
369 |       "text/plain": [
370 |        "<Figure size 640x480 with 1 Axes>"
371 |       ]
372 |      },
373 |      "metadata": {},
374 |      "output_type": "display_data"
375 |     },
376 |     {
377 |      "name": "stdout",
378 |      "output_type": "stream",
379 |      "text": [
380 |       "aga\n",
381 |       "Epoch: 51\n",
382 |       "totalsongs(1, 1062, 10000)\n",
383 |       "Song loss: 2674.338\n",
384 |       "Curr Epoch: 51 Curr Batch: 0/1\n",
385 |       "Batch Loss: 2674.338\n",
386 |       "Epoch Avg Loss: 2674.338\n",
387 |       "Epoch: 52\n",
388 |       "totalsongs(1, 1062, 10000)\n",
389 |       "Song loss: 2673.1316\n",
390 |       "Curr Epoch: 52 Curr Batch: 0/1\n",
391 |       "Batch Loss: 2673.1316\n",
392 |       "Epoch Avg Loss: 2673.1316\n",
393 |       "Epoch: 53\n",
394 |       "totalsongs(1, 1062, 10000)\n",
395 |       "Song loss: 2671.9539\n",
396 |       "Curr Epoch: 53 Curr Batch: 0/1\n",
397 |       "Batch Loss: 2671.9539\n",
398 |       "Epoch Avg Loss: 2671.9539\n",
399 |       "Epoch: 54\n",
400 |       "totalsongs(1, 1062, 10000)\n",
401 |       "Song loss: 2670.7708\n",
402 |       "Curr Epoch: 54 Curr Batch: 0/1\n",
403 |       "Batch Loss: 2670.7708\n",
404 |       "Epoch Avg Loss: 2670.7708\n",
405 |       "Epoch: 55\n",
406 |       "totalsongs(1, 1062, 10000)\n",
407 |       "Song loss: 2669.581\n",
408 |       "Curr Epoch: 55 Curr Batch: 0/1\n",
409 |       "Batch Loss: 2669.581\n",
410 |       "Epoch Avg Loss: 2669.581\n",
411 |       "Epoch: 56\n",
412 |       "totalsongs(1, 1062, 10000)\n",
413 |       "Song loss: 2668.4133\n",
414 |       "Curr Epoch: 56 Curr Batch: 0/1\n",
415 |       "Batch Loss: 2668.4133\n",
416 |       "Epoch Avg Loss: 2668.4133\n",
417 |       "Epoch: 57\n",
418 |       "totalsongs(1, 1062, 10000)\n",
419 |       "Song loss: 2667.2917\n",
420 |       "Curr Epoch: 57 Curr Batch: 0/1\n",
421 |       "Batch Loss: 2667.2917\n",
422 |       "Epoch Avg Loss: 2667.2917\n",
423 |       "Epoch: 58\n",
424 |       "totalsongs(1, 1062, 10000)\n",
425 |       "Song loss: 2666.2083\n",
426 |       "Curr Epoch: 58 Curr Batch: 0/1\n",
427 |       "Batch Loss: 2666.2083\n",
428 |       "Epoch Avg Loss: 2666.2083\n",
429 |       "Epoch: 59\n",
430 |       "totalsongs(1, 1062, 10000)\n",
431 |       "Song loss: 2665.1345\n",
432 |       "Curr Epoch: 59 Curr Batch: 0/1\n",
433 |       "Batch Loss: 2665.1345\n",
434 |       "Epoch Avg Loss: 2665.1345\n",
435 |       "Epoch: 60\n",
436 |       "totalsongs(1, 1062, 10000)\n",
437 |       "Song loss: 2664.053\n",
438 |       "Curr Epoch: 60 Curr Batch: 0/1\n",
439 |       "Batch Loss: 2664.053\n",
440 |       "Epoch Avg Loss: 2664.053\n",
441 |       "Epoch: 61\n",
442 |       "totalsongs(1, 1062, 10000)\n",
443 |       "Song loss: 2662.9692\n",
444 |       "Curr Epoch: 61 Curr Batch: 0/1\n",
445 |       "Batch Loss: 2662.9692\n",
446 |       "Epoch Avg Loss: 2662.9692\n",
447 |       "Epoch: 62\n",
448 |       "totalsongs(1, 1062, 10000)\n",
449 |       "Song loss: 2661.9136\n",
450 |       "Curr Epoch: 62 Curr Batch: 0/1\n",
451 |       "Batch Loss: 2661.9136\n",
452 |       "Epoch Avg Loss: 2661.9136\n",
453 |       "Epoch: 63\n",
454 |       "totalsongs(1, 1062, 10000)\n",
455 |       "Song loss: 2660.8782\n",
456 |       "Curr Epoch: 63 Curr Batch: 0/1\n",
457 |       "Batch Loss: 2660.8782\n",
458 |       "Epoch Avg Loss: 2660.8782\n",
459 |       "Epoch: 64\n",
460 |       "totalsongs(1, 1062, 10000)\n",
461 |       "Song loss: 2659.87\n",
462 |       "Curr Epoch: 64 Curr Batch: 0/1\n",
463 |       "Batch Loss: 2659.87\n",
464 |       "Epoch Avg Loss: 2659.87\n",
465 |       "Epoch: 65\n",
466 |       "totalsongs(1, 1062, 10000)\n",
467 |       "Song loss: 2658.871\n",
468 |       "Curr Epoch: 65 Curr Batch: 0/1\n",
469 |       "Batch Loss: 2658.871\n",
470 |       "Epoch Avg Loss: 2658.871\n",
471 |       "Epoch: 66\n",
472 |       "totalsongs(1, 1062, 10000)\n",
473 |       "Song loss: 2657.871\n",
474 |       "Curr Epoch: 66 Curr Batch: 0/1\n",
475 |       "Batch Loss: 2657.871\n",
476 |       "Epoch Avg Loss: 2657.871\n",
477 |       "Epoch: 67\n",
478 |       "totalsongs(1, 1062, 10000)\n",
479 |       "Song loss: 2656.8755\n",
480 |       "Curr Epoch: 67 Curr Batch: 0/1\n",
481 |       "Batch Loss: 2656.8755\n",
482 |       "Epoch Avg Loss: 2656.8755\n",
483 |       "Epoch: 68\n",
484 |       "totalsongs(1, 1062, 10000)\n",
485 |       "Song loss: 2655.8926\n",
486 |       "Curr Epoch: 68 Curr Batch: 0/1\n",
487 |       "Batch Loss: 2655.8926\n",
488 |       "Epoch Avg Loss: 2655.8926\n",
489 |       "Epoch: 69\n",
490 |       "totalsongs(1, 1062, 10000)\n",
491 |       "Song loss: 2654.9253\n",
492 |       "Curr Epoch: 69 Curr Batch: 0/1\n",
493 |       "Batch Loss: 2654.9253\n",
494 |       "Epoch Avg Loss: 2654.9253\n",
495 |       "Epoch: 70\n",
496 |       "totalsongs(1, 1062, 10000)\n",
497 |       "Song loss: 2653.9653\n",
498 |       "Curr Epoch: 70 Curr Batch: 0/1\n",
499 |       "Batch Loss: 2653.9653\n",
500 |       "Epoch Avg Loss: 2653.9653\n",
501 |       "Epoch: 71\n",
502 |       "totalsongs(1, 1062, 10000)\n",
503 |       "Song loss: 2653.0088\n",
504 |       "Curr Epoch: 71 Curr Batch: 0/1\n",
505 |       "Batch Loss: 2653.0088\n",
506 |       "Epoch Avg Loss: 2653.0088\n",
507 |       "Epoch: 72\n",
508 |       "totalsongs(1, 1062, 10000)\n",
509 |       "Song loss: 2652.0562\n",
510 |       "Curr Epoch: 72 Curr Batch: 0/1\n",
511 |       "Batch Loss: 2652.0562\n",
512 |       "Epoch Avg Loss: 2652.0562\n",
513 |       "Epoch: 73\n",
514 |       "totalsongs(1, 1062, 10000)\n"
515 |      ]
516 |     }
517 |    ],
518 |    "source": [
519 |     "#!/usr/bin/env python\n",
520 |     "# coding: utf-8\n",
521 |     "#@Author: Wajahat Waheed\n",
522 |     "import math\n",
523 |     "import tensorflow as tf\n",
524 |     "import numpy as np\n",
525 |     "from functools import partial\n",
526 |     "import numpy as np\n",
527 |     "import matplotlib.pyplot as plt\n",
528 |     "import tensorflow as tf\n",
529 |     "from tensorflow.contrib.framework.python.ops import audio_ops\n",
530 |     "from tensorflow.contrib import ffmpeg\n",
531 |     "from scipy.fftpack import rfft, irfft\n",
532 |     "from glob import iglob\n",
533 |     "from pydub import AudioSegment\n",
534 |     "import pickle\n",
535 |     "DATA_FILES_MP3 = 'audio'\n",
536 |     "DATA_FILES_WAV = r'F:\\tfaudio\\audiowav' #This will be your directory of the training audio waves\n",
537 |     "file_arr = []\n",
538 |     "curr_batch = 0\n",
539 |     "def convert_mp3_to_wav():\n",
540 |     "    index = 0\n",
541 |     "    for file in iglob(DATA_FILES_MP3 + '/*.mp3'):\n",
542 |     "        mp3_to_wav = AudioSegment.from_mp3(file)\n",
543 |     "        mp3_to_wav.export(DATA_FILES_WAV+'/'+str(index)+'.wav', format='wav')\n",
544 |     "        index += 1\n",
545 |     "def process_wav():\n",
546 |     "    file_range = 0\n",
547 |     "    for file in iglob(DATA_FILES_WAV +'\\*.wav'):\n",
548 |     "        file_arr.append(file)\n",
549 |     "def get_next_batch(curr_batch, songs_per_batch, sess):\n",
550 |     "    wav_arr_ch1 = []\n",
551 |     "    wav_arr_ch2 = []\n",
552 |     "    if (curr_batch) >= (len(file_arr)):\n",
553 |     "        curr_batch = 0\n",
554 |     "    start_position = curr_batch * songs_per_batch\n",
555 |     "    end_position = start_position + songs_per_batch\n",
556 |     "    print(start_position, end_position)\n",
557 |     "    for idx in range(start_position, end_position):\n",
558 |     "        print(\"See this:\",file_arr[idx])\n",
559 |     "        audio_binary = tf.read_file(file_arr[idx])\n",
560 |     "        wav_decoder = audio_ops.decode_wav(audio_binary,desired_channels=2)\n",
561 |     "        sample_rate, audio = sess.run([wav_decoder.sample_rate, wav_decoder.audio])\n",
562 |     "        audio = np.array(audio)\n",
563 |     "        print('Audio shape', str(audio.shape))\n",
564 |     "        if len(audio[:, 0]) != 5314197: \n",
565 |     "            print(\"yes\")\n",
566 |     "            continue\n",
567 |     "        wav_arr_ch1.append(rfft(audio[:,0]))\n",
568 |     "        wav_arr_ch2.append(rfft(audio[:,1]))\n",
569 |     "        print(\"Returning File: \" + file_arr[idx])\n",
570 |     "    return wav_arr_ch1, wav_arr_ch2, sample_rate\n",
571 |     "\n",
572 |     "def save_to_wav( enc_song_ch1, enc_song_ch2, audio_arr_ch1, audio_arr_ch2, sample_rate, original_song_ch1, original_song_ch2, idty, folder, sess, dump=False):\n",
573 |     "    #sample_rate = 44100\n",
574 |     "    audio_arr_ch1 = irfft(np.hstack(np.hstack(audio_arr_ch1)))\n",
575 |     "    audio_arr_ch2 = irfft(np.hstack(np.hstack(audio_arr_ch2)))\n",
576 |     "\n",
577 |     "    original_song_ch1 = irfft(np.hstack(np.hstack(original_song_ch1)))\n",
578 |     "    original_song_ch2 = irfft(np.hstack(np.hstack(original_song_ch2)))\n",
579 |     "    \n",
580 |     "    enc_song_ch1 = irfft(np.hstack(np.hstack(enc_song_ch1)))\n",
581 |     "    enc_song_ch2 = irfft(np.hstack(np.hstack(enc_song_ch2)))\n",
582 |     "\n",
583 |     "    original_song = np.hstack(np.array((original_song_ch1, original_song_ch2)).T)\n",
584 |     "    audio_arr = np.hstack(np.array((audio_arr_ch1, audio_arr_ch2)).T)\n",
585 |     "    enc_song = np.hstack(np.array((enc_song_ch1, enc_song_ch2)).T)\n",
586 |     "    w = np.linspace(0, sample_rate, len(audio_arr))\n",
587 |     "    w = w[0:len(audio_arr)]\n",
588 |     "    \n",
589 |     "    plt.figure(1)\n",
590 |     "    plt.plot(w, original_song)\n",
591 |     "    plt.savefig(str(folder) + '/original.png')\n",
592 |     "    plt.plot(w, audio_arr)\n",
593 |     "    plt.xlabel('sample')\n",
594 |     "    plt.ylabel('amplitude')\n",
595 |     "    plt.savefig(str(folder) + '/compressed' + str(idty) + '.png')\n",
596 |     "    #plt.clf()\n",
597 |     "    plt.show()\n",
598 |     "    cols = 2\n",
599 |     "    rows = math.floor(len(audio_arr)/2)\n",
600 |     "    audio_arr = audio_arr.reshape(rows, cols)\n",
601 |     "    original_song = original_song.reshape(rows, cols)\n",
602 |     "    enc_song = enc_song.reshape(math.floor(len(enc_song)/2), cols) \n",
603 |     "    if dump == True:\n",
604 |     "        from scipy.io import wavfile\n",
605 |     "        print('aga')\n",
606 |     "        wavfile.write('Reconstructedsongforcontin'+str(idty)+'.wav', sample_rate, audio_arr)\n",
607 |     "        wavfile.write('origal_songforcontin.wav', sample_rate, original_song)\n",
608 |     "        pickle.dump(enc_song, open( \"enc_song.p\", \"wb\" ) )\n",
609 |     "        \n",
610 |     "def next_batch(c_batch, batch_size, sess):\n",
611 |     "    ch1_arr = []\n",
612 |     "    ch2_arr = []\n",
613 |     "    wav_arr_ch1, wav_arr_ch2, sample_rate = get_next_batch(c_batch, batch_size, sess)\n",
614 |     "\n",
615 |     "    for sub_arr in wav_arr_ch1:\n",
616 |     "        batch_size_ch1 = math.floor(len(sub_arr)/inputs)\n",
617 |     "        sub_arr = sub_arr[:(batch_size_ch1*inputs)]\n",
618 |     "        ch1_arr.append(np.array(sub_arr).reshape(batch_size_ch1, inputs))\n",
619 |     "\n",
620 |     "    for sub_arr in  wav_arr_ch2:\n",
621 |     "        batch_size_ch2 = math.floor(len(sub_arr)/inputs)\n",
622 |     "        sub_arr = sub_arr[:(batch_size_ch2*inputs)]\n",
623 |     "        ch2_arr.append(np.array(sub_arr).reshape(batch_size_ch2, inputs))\n",
624 |     "\n",
625 |     "    return np.array(ch1_arr), np.array(ch2_arr), sample_rate\n",
626 |     "\n",
627 |     "#LOSS_OUT_FILE = 'Epoch_Loss.txt'\n",
628 |     "process_wav()\n",
629 |     "inputs = 10000\n",
630 |     "#Learning rate\n",
631 |     "lr = 0.00005\n",
632 |     "# L2 regularization\n",
633 |     "l2 = 0.00005\n",
634 |     "hidden_1_size = 7000\n",
635 |     "hidden_2_size = 4440\n",
636 |     "hidden_3_size = 1800\n",
637 |     "epochs = 50000\n",
638 |     "batch_size = 1\n",
639 |     "batches = 1\n",
640 |     "X = tf.placeholder(tf.float32, shape=[None, inputs])\n",
641 |     "l2_regularizer = tf.contrib.layers.l2_regularizer(l2)\n",
642 |     "#saver = tf.train.Saver()\n",
643 |     "autoencoder_dnn = partial(tf.layers.dense, activation = tf.nn.elu,kernel_initializer = tf.contrib.layers.variance_scaling_initializer(),kernel_regularizer=  tf.contrib.layers.l2_regularizer(l2))\n",
644 |     "hidden_1 = autoencoder_dnn(X, hidden_1_size)\n",
645 |     "hidden_2 = autoencoder_dnn(hidden_1, hidden_2_size)\n",
646 |     "hidden_4 = autoencoder_dnn(hidden_2, hidden_3_size)\n",
647 |     "hidden_5 = autoencoder_dnn(hidden_4, hidden_2_size)\n",
648 |     "outputs =  autoencoder_dnn(hidden_5, inputs, activation=None)\n",
649 |     "#encoder = autoencoder_dnn(inputs, hidden_4, activation=None)\n",
650 |     "#decoder = autoencoder_dnn(hidden_4, inputs, activation=None)\n",
651 |     "reconstruction_loss = tf.reduce_mean(tf.square(outputs-X))\n",
652 |     "reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n",
653 |     "loss = tf.add_n([reconstruction_loss] + reg_loss)\n",
654 |     "optimizer = tf.train.AdamOptimizer(lr)\n",
655 |     "training_op = optimizer.minimize(loss)\n",
656 |     "init = tf.global_variables_initializer()\n",
657 |     "saver = tf.train.Saver()\n",
658 |     "\n",
659 |     "##### Run training\n",
660 |     "with tf.Session() as sess:\n",
661 |     "    init.run()\n",
662 |     "    saver.restore(sess, \"./model.ckpt\")\n",
663 |     "    ch1_song, ch2_song, sample_rate = next_batch(4, batch_size, sess)\n",
664 |     "    for epoch in range(epochs):\n",
665 |     "        epoch_loss = []\n",
666 |     "        print(\"Epoch: \" + str(epoch))\n",
667 |     "        for i in range(batches):      \n",
668 |     "          # print(\"ch1_song\",ch1_song) \n",
669 |     "            total_songs = np.hstack([ch1_song, ch2_song])\n",
670 |     "            print('totalsongs' + str(total_songs.shape))\n",
671 |     "            batch_loss = []\n",
672 |     "            for j in range(len(total_songs)):\n",
673 |     "                x_batch = total_songs[j]\n",
674 |     "                _, l = sess.run([training_op, loss], feed_dict={X:x_batch})\n",
675 |     "                batch_loss.append(l)\n",
676 |     "                print(\"Song loss: \" + str(l))\n",
677 |     "\n",
678 |     "            print(\"Curr Epoch: \" + str(epoch) + \" Curr Batch: \" + str(i) + \"/\"+ str(batches))\n",
679 |     "            print(\"Batch Loss: \" + str(np.mean(batch_loss)))\n",
680 |     "            epoch_loss.append(np.mean(batch_loss))\n",
681 |     "        print(\"Epoch Avg Loss: \" + str(np.mean(epoch_loss)))\n",
682 |     "        if epoch == 50:\n",
683 |     "            ch1_song_new, ch2_song_new, sample_rate_new = ch1_song, ch2_song, sample_rate\n",
684 |     "            #ch1_song_new, ch2_song_new, sample_rate_new = next_batch(0, 1,sess)\n",
685 |     "            \n",
686 |     "          # print(\"ch1\",ch1_song_new)\n",
687 |     "          # print(\"ch2\",ch2_song_new)\n",
688 |     "            x_batch = np.hstack([ch1_song_new, ch2_song_new])[0]\n",
689 |     "            print(\"Sample rate: \" + str(sample_rate_new))\n",
690 |     "\n",
691 |     "            orig_song = []\n",
692 |     "            full_song = []\n",
693 |     "            encoded_song = []\n",
694 |     "            encoded = hidden_4.eval(feed_dict={X: x_batch})\n",
695 |     "            print('encodedshape' + str(encoded.shape))\n",
696 |     "            evaluation = outputs.eval(feed_dict={X: x_batch})\n",
697 |     "            print(\"Output: \" + str(evaluation))\n",
698 |     "            print('evaluation'+str(evaluation.shape))\n",
699 |     "            print('x_batch'+str(x_batch.shape))\n",
700 |     "            encoded_song.append(encoded)\n",
701 |     "            full_song.append(evaluation)\n",
702 |     "            orig_song.append(x_batch)\n",
703 |     "\n",
704 |     "            # Merge the nested arrays\n",
705 |     "            orig_song = np.hstack(orig_song)\n",
706 |     "            full_song = np.hstack(full_song)\n",
707 |     "            encoded_song = np.hstack(encoded_song)\n",
708 |     "\n",
709 |     "            # Compute and split the channels\n",
710 |     "            orig_song_ch1 = orig_song[:math.floor(len(orig_song)/2)]\n",
711 |     "            orig_song_ch2 = orig_song[math.floor(len(orig_song)/2):]\n",
712 |     "            full_song_ch1 = full_song[:math.floor(len(full_song)/2)]\n",
713 |     "            full_song_ch2 = full_song[math.floor(len(full_song)/2):]\n",
714 |     "            enc_song_ch1 = encoded_song[:math.floor(len(encoded_song)/2)]\n",
715 |     "            enc_song_ch2 = encoded_song[math.floor(len(encoded_song)/2):]\n",
716 |     "\n",
717 |     "            # Save both the untouched song and reconstructed song to the 'output' folder\n",
718 |     "            save_to_wav(enc_song_ch1, enc_song_ch2, full_song_ch1, full_song_ch2, sample_rate, orig_song_ch1, orig_song_ch2, epoch, 'output', sess, True)\n",
719 |     "            #saver.save(sess, './model.ckpt')"
720 |    ]
721 |   }
722 |  ],
723 |  "metadata": {
724 |   "kernelspec": {
725 |    "display_name": "Python 3",
726 |    "language": "python",
727 |    "name": "python3"
728 |   },
729 |   "language_info": {
730 |    "codemirror_mode": {
731 |     "name": "ipython",
732 |     "version": 3
733 |    },
734 |    "file_extension": ".py",
735 |    "mimetype": "text/x-python",
736 |    "name": "python",
737 |    "nbconvert_exporter": "python",
738 |    "pygments_lexer": "ipython3",
739 |    "version": "3.7.3"
740 |   }
741 |  },
742 |  "nbformat": 4,
743 |  "nbformat_minor": 2
744 | }
745 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AudioCompression using AutoEncoder
 2 | # Machine Learning is replacing previous techniques in each field
 3 | # So I thought why not try Machine Learning to compress the audio
 4 | 
 5 | Nodes for each layer can be tweeked, you can compress an audio upto 23 TIMES or even more (I haven't tried it myself but you obviously can)!
 6 | 
 7 | Your dataset to train the AutoEncoder Model needs to be .wav files since the libraries used in the code are not quite friendly with other formats.
 8 | 
 9 | # Must note that the compression is for Data Transfer and Data Transmission not Storage since you will get a pickle file which you can decode on the other end! (Both available in the code)
10 | 
11 | Code of both Encoding and Decoding is available in the jupyter file. Happy Compressing! 
12 | 
13 | If you have a problem getting the dataset, feel free to reach out to me @ waheed@pnw.edu
14 | 
15 | This project was supervised by Professor Orhan Arikan, Department Chair of Bilkent University's EEE department.
16 | 


--------------------------------------------------------------------------------