├── AudioClassification.ipynb
└── results.csv


/AudioClassification.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# 1. Import and Install Dependencies"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "## 1.1 Install Dependencies"
  15 |    ]
  16 |   },
  17 |   {
  18 |    "cell_type": "code",
  19 |    "execution_count": null,
  20 |    "metadata": {
  21 |     "jupyter": {
  22 |      "source_hidden": true
  23 |     },
  24 |     "tags": []
  25 |    },
  26 |    "outputs": [],
  27 |    "source": [
  28 |     "!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 tensorflow-io matplotlib"
  29 |    ]
  30 |   },
  31 |   {
  32 |    "cell_type": "markdown",
  33 |    "metadata": {},
  34 |    "source": [
  35 |     "## 1.2 Load Dependencies"
  36 |    ]
  37 |   },
  38 |   {
  39 |    "cell_type": "code",
  40 |    "execution_count": null,
  41 |    "metadata": {
  42 |     "jupyter": {
  43 |      "source_hidden": true
  44 |     },
  45 |     "tags": []
  46 |    },
  47 |    "outputs": [],
  48 |    "source": [
  49 |     "import os\n",
  50 |     "from matplotlib import pyplot as plt\n",
  51 |     "import tensorflow as tf \n",
  52 |     "import tensorflow_io as tfio"
  53 |    ]
  54 |   },
  55 |   {
  56 |    "cell_type": "markdown",
  57 |    "metadata": {},
  58 |    "source": [
  59 |     "# 2. Build Data Loading Function"
  60 |    ]
  61 |   },
  62 |   {
  63 |    "cell_type": "markdown",
  64 |    "metadata": {},
  65 |    "source": [
  66 |     "## 2.1 Define Paths to Files"
  67 |    ]
  68 |   },
  69 |   {
  70 |    "cell_type": "code",
  71 |    "execution_count": null,
  72 |    "metadata": {
  73 |     "jupyter": {
  74 |      "source_hidden": true
  75 |     },
  76 |     "tags": []
  77 |    },
  78 |    "outputs": [],
  79 |    "source": [
  80 |     "CAPUCHIN_FILE = os.path.join('data', 'Parsed_Capuchinbird_Clips', 'XC3776-3.wav')\n",
  81 |     "NOT_CAPUCHIN_FILE = os.path.join('data', 'Parsed_Not_Capuchinbird_Clips', 'afternoon-birds-song-in-forest-0.wav')"
  82 |    ]
  83 |   },
  84 |   {
  85 |    "cell_type": "markdown",
  86 |    "metadata": {},
  87 |    "source": [
  88 |     "## 2.2 Build Dataloading Function"
  89 |    ]
  90 |   },
  91 |   {
  92 |    "cell_type": "code",
  93 |    "execution_count": null,
  94 |    "metadata": {
  95 |     "jupyter": {
  96 |      "source_hidden": true
  97 |     },
  98 |     "tags": []
  99 |    },
 100 |    "outputs": [],
 101 |    "source": [
 102 |     "def load_wav_16k_mono(filename):\n",
 103 |     "    # Load encoded wav file\n",
 104 |     "    file_contents = tf.io.read_file(filename)\n",
 105 |     "    # Decode wav (tensors by channels) \n",
 106 |     "    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)\n",
 107 |     "    # Removes trailing axis\n",
 108 |     "    wav = tf.squeeze(wav, axis=-1)\n",
 109 |     "    sample_rate = tf.cast(sample_rate, dtype=tf.int64)\n",
 110 |     "    # Goes from 44100Hz to 16000hz - amplitude of the audio signal\n",
 111 |     "    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)\n",
 112 |     "    return wav"
 113 |    ]
 114 |   },
 115 |   {
 116 |    "cell_type": "markdown",
 117 |    "metadata": {},
 118 |    "source": [
 119 |     "## 2.3 Plot Wave"
 120 |    ]
 121 |   },
 122 |   {
 123 |    "cell_type": "code",
 124 |    "execution_count": null,
 125 |    "metadata": {
 126 |     "jupyter": {
 127 |      "source_hidden": true
 128 |     },
 129 |     "tags": []
 130 |    },
 131 |    "outputs": [],
 132 |    "source": [
 133 |     "wave = load_wav_16k_mono(CAPUCHIN_FILE)\n",
 134 |     "nwave = load_wav_16k_mono(NOT_CAPUCHIN_FILE)"
 135 |    ]
 136 |   },
 137 |   {
 138 |    "cell_type": "code",
 139 |    "execution_count": null,
 140 |    "metadata": {
 141 |     "jupyter": {
 142 |      "source_hidden": true
 143 |     },
 144 |     "tags": []
 145 |    },
 146 |    "outputs": [],
 147 |    "source": [
 148 |     "plt.plot(wave)\n",
 149 |     "plt.plot(nwave)\n",
 150 |     "plt.show()"
 151 |    ]
 152 |   },
 153 |   {
 154 |    "cell_type": "markdown",
 155 |    "metadata": {},
 156 |    "source": [
 157 |     "# 3. Create Tensorflow Dataset"
 158 |    ]
 159 |   },
 160 |   {
 161 |    "cell_type": "markdown",
 162 |    "metadata": {},
 163 |    "source": [
 164 |     "## 3.1 Define Paths to Positive and Negative Data"
 165 |    ]
 166 |   },
 167 |   {
 168 |    "cell_type": "code",
 169 |    "execution_count": null,
 170 |    "metadata": {
 171 |     "jupyter": {
 172 |      "source_hidden": true
 173 |     },
 174 |     "tags": []
 175 |    },
 176 |    "outputs": [],
 177 |    "source": [
 178 |     "POS = os.path.join('data', 'Parsed_Capuchinbird_Clips')\n",
 179 |     "NEG = os.path.join('data', 'Parsed_Not_Capuchinbird_Clips')"
 180 |    ]
 181 |   },
 182 |   {
 183 |    "cell_type": "markdown",
 184 |    "metadata": {},
 185 |    "source": [
 186 |     "## 3.2 Create Tensorflow Datasets"
 187 |    ]
 188 |   },
 189 |   {
 190 |    "cell_type": "code",
 191 |    "execution_count": null,
 192 |    "metadata": {
 193 |     "jupyter": {
 194 |      "source_hidden": true
 195 |     },
 196 |     "tags": []
 197 |    },
 198 |    "outputs": [],
 199 |    "source": [
 200 |     "pos = tf.data.Dataset.list_files(POS+'\\*.wav')\n",
 201 |     "neg = tf.data.Dataset.list_files(NEG+'\\*.wav')"
 202 |    ]
 203 |   },
 204 |   {
 205 |    "cell_type": "markdown",
 206 |    "metadata": {},
 207 |    "source": [
 208 |     "## 3.3 Add labels and Combine Positive and Negative Samples"
 209 |    ]
 210 |   },
 211 |   {
 212 |    "cell_type": "code",
 213 |    "execution_count": null,
 214 |    "metadata": {
 215 |     "jupyter": {
 216 |      "source_hidden": true
 217 |     },
 218 |     "tags": []
 219 |    },
 220 |    "outputs": [],
 221 |    "source": [
 222 |     "positives = tf.data.Dataset.zip((pos, tf.data.Dataset.from_tensor_slices(tf.ones(len(pos)))))\n",
 223 |     "negatives = tf.data.Dataset.zip((neg, tf.data.Dataset.from_tensor_slices(tf.zeros(len(neg)))))\n",
 224 |     "data = positives.concatenate(negatives)"
 225 |    ]
 226 |   },
 227 |   {
 228 |    "cell_type": "markdown",
 229 |    "metadata": {},
 230 |    "source": [
 231 |     "# 4. Determine Average Length of a Capuchin Call"
 232 |    ]
 233 |   },
 234 |   {
 235 |    "cell_type": "markdown",
 236 |    "metadata": {},
 237 |    "source": [
 238 |     "## 4.1 Calculate Wave Cycle Length"
 239 |    ]
 240 |   },
 241 |   {
 242 |    "cell_type": "code",
 243 |    "execution_count": null,
 244 |    "metadata": {
 245 |     "jupyter": {
 246 |      "source_hidden": true
 247 |     },
 248 |     "tags": []
 249 |    },
 250 |    "outputs": [],
 251 |    "source": [
 252 |     "lengths = []\n",
 253 |     "for file in os.listdir(os.path.join('data', 'Parsed_Capuchinbird_Clips')):\n",
 254 |     "    tensor_wave = load_wav_16k_mono(os.path.join('data', 'Parsed_Capuchinbird_Clips', file))\n",
 255 |     "    lengths.append(len(tensor_wave))"
 256 |    ]
 257 |   },
 258 |   {
 259 |    "cell_type": "markdown",
 260 |    "metadata": {},
 261 |    "source": [
 262 |     "## 4.2 Calculate Mean, Min and Max"
 263 |    ]
 264 |   },
 265 |   {
 266 |    "cell_type": "code",
 267 |    "execution_count": null,
 268 |    "metadata": {
 269 |     "jupyter": {
 270 |      "source_hidden": true
 271 |     },
 272 |     "tags": []
 273 |    },
 274 |    "outputs": [],
 275 |    "source": [
 276 |     "tf.math.reduce_mean(lengths)"
 277 |    ]
 278 |   },
 279 |   {
 280 |    "cell_type": "code",
 281 |    "execution_count": null,
 282 |    "metadata": {
 283 |     "jupyter": {
 284 |      "source_hidden": true
 285 |     },
 286 |     "tags": []
 287 |    },
 288 |    "outputs": [],
 289 |    "source": [
 290 |     "tf.math.reduce_min(lengths)"
 291 |    ]
 292 |   },
 293 |   {
 294 |    "cell_type": "code",
 295 |    "execution_count": null,
 296 |    "metadata": {
 297 |     "jupyter": {
 298 |      "source_hidden": true
 299 |     },
 300 |     "tags": []
 301 |    },
 302 |    "outputs": [],
 303 |    "source": [
 304 |     "tf.math.reduce_max(lengths)"
 305 |    ]
 306 |   },
 307 |   {
 308 |    "cell_type": "markdown",
 309 |    "metadata": {},
 310 |    "source": [
 311 |     "# 5. Build Preprocessing Function to Convert to Spectrogram"
 312 |    ]
 313 |   },
 314 |   {
 315 |    "cell_type": "markdown",
 316 |    "metadata": {},
 317 |    "source": [
 318 |     "## 5.1 Build Preprocessing Function"
 319 |    ]
 320 |   },
 321 |   {
 322 |    "cell_type": "code",
 323 |    "execution_count": null,
 324 |    "metadata": {
 325 |     "jupyter": {
 326 |      "source_hidden": true
 327 |     },
 328 |     "tags": []
 329 |    },
 330 |    "outputs": [],
 331 |    "source": [
 332 |     "def preprocess(file_path, label): \n",
 333 |     "    wav = load_wav_16k_mono(file_path)\n",
 334 |     "    wav = wav[:48000]\n",
 335 |     "    zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)\n",
 336 |     "    wav = tf.concat([zero_padding, wav],0)\n",
 337 |     "    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)\n",
 338 |     "    spectrogram = tf.abs(spectrogram)\n",
 339 |     "    spectrogram = tf.expand_dims(spectrogram, axis=2)\n",
 340 |     "    return spectrogram, label"
 341 |    ]
 342 |   },
 343 |   {
 344 |    "cell_type": "markdown",
 345 |    "metadata": {},
 346 |    "source": [
 347 |     "## 5.2 Test Out the Function and Viz the Spectrogram"
 348 |    ]
 349 |   },
 350 |   {
 351 |    "cell_type": "code",
 352 |    "execution_count": null,
 353 |    "metadata": {
 354 |     "jupyter": {
 355 |      "source_hidden": true
 356 |     },
 357 |     "tags": []
 358 |    },
 359 |    "outputs": [],
 360 |    "source": [
 361 |     "filepath, label = positives.shuffle(buffer_size=10000).as_numpy_iterator().next()"
 362 |    ]
 363 |   },
 364 |   {
 365 |    "cell_type": "code",
 366 |    "execution_count": null,
 367 |    "metadata": {
 368 |     "jupyter": {
 369 |      "source_hidden": true
 370 |     },
 371 |     "tags": []
 372 |    },
 373 |    "outputs": [],
 374 |    "source": [
 375 |     "spectrogram, label = preprocess(filepath, label)"
 376 |    ]
 377 |   },
 378 |   {
 379 |    "cell_type": "code",
 380 |    "execution_count": null,
 381 |    "metadata": {
 382 |     "jupyter": {
 383 |      "source_hidden": true
 384 |     },
 385 |     "tags": []
 386 |    },
 387 |    "outputs": [],
 388 |    "source": [
 389 |     "plt.figure(figsize=(30,20))\n",
 390 |     "plt.imshow(tf.transpose(spectrogram)[0])\n",
 391 |     "plt.show()"
 392 |    ]
 393 |   },
 394 |   {
 395 |    "cell_type": "markdown",
 396 |    "metadata": {},
 397 |    "source": [
 398 |     "# 6. Create Training and Testing Partitions"
 399 |    ]
 400 |   },
 401 |   {
 402 |    "cell_type": "markdown",
 403 |    "metadata": {},
 404 |    "source": [
 405 |     "## 6.1 Create a Tensorflow Data Pipeline"
 406 |    ]
 407 |   },
 408 |   {
 409 |    "cell_type": "code",
 410 |    "execution_count": null,
 411 |    "metadata": {
 412 |     "jupyter": {
 413 |      "source_hidden": true
 414 |     },
 415 |     "tags": []
 416 |    },
 417 |    "outputs": [],
 418 |    "source": [
 419 |     "data = data.map(preprocess)\n",
 420 |     "data = data.cache()\n",
 421 |     "data = data.shuffle(buffer_size=1000)\n",
 422 |     "data = data.batch(16)\n",
 423 |     "data = data.prefetch(8)"
 424 |    ]
 425 |   },
 426 |   {
 427 |    "cell_type": "markdown",
 428 |    "metadata": {},
 429 |    "source": [
 430 |     "## 6.2 Split into Training and Testing Partitions"
 431 |    ]
 432 |   },
 433 |   {
 434 |    "cell_type": "code",
 435 |    "execution_count": null,
 436 |    "metadata": {
 437 |     "jupyter": {
 438 |      "source_hidden": true
 439 |     },
 440 |     "tags": []
 441 |    },
 442 |    "outputs": [],
 443 |    "source": [
 444 |     "train = data.take(36)\n",
 445 |     "test = data.skip(36).take(15)"
 446 |    ]
 447 |   },
 448 |   {
 449 |    "cell_type": "markdown",
 450 |    "metadata": {},
 451 |    "source": [
 452 |     "## 6.3 Test One Batch"
 453 |    ]
 454 |   },
 455 |   {
 456 |    "cell_type": "code",
 457 |    "execution_count": null,
 458 |    "metadata": {
 459 |     "jupyter": {
 460 |      "source_hidden": true
 461 |     },
 462 |     "tags": []
 463 |    },
 464 |    "outputs": [],
 465 |    "source": [
 466 |     "samples, labels = train.as_numpy_iterator().next()"
 467 |    ]
 468 |   },
 469 |   {
 470 |    "cell_type": "code",
 471 |    "execution_count": null,
 472 |    "metadata": {
 473 |     "jupyter": {
 474 |      "source_hidden": true
 475 |     },
 476 |     "tags": []
 477 |    },
 478 |    "outputs": [],
 479 |    "source": [
 480 |     "samples.shape"
 481 |    ]
 482 |   },
 483 |   {
 484 |    "cell_type": "markdown",
 485 |    "metadata": {},
 486 |    "source": [
 487 |     "# 7. Build Deep Learning Model"
 488 |    ]
 489 |   },
 490 |   {
 491 |    "cell_type": "markdown",
 492 |    "metadata": {},
 493 |    "source": [
 494 |     "## 7.1 Load Tensorflow Dependencies"
 495 |    ]
 496 |   },
 497 |   {
 498 |    "cell_type": "code",
 499 |    "execution_count": null,
 500 |    "metadata": {
 501 |     "jupyter": {
 502 |      "source_hidden": true
 503 |     },
 504 |     "tags": []
 505 |    },
 506 |    "outputs": [],
 507 |    "source": [
 508 |     "from tensorflow.keras.models import Sequential\n",
 509 |     "from tensorflow.keras.layers import Conv2D, Dense, Flatten"
 510 |    ]
 511 |   },
 512 |   {
 513 |    "cell_type": "markdown",
 514 |    "metadata": {},
 515 |    "source": [
 516 |     "## 7.2 Build Sequential Model, Compile and View Summary"
 517 |    ]
 518 |   },
 519 |   {
 520 |    "cell_type": "code",
 521 |    "execution_count": null,
 522 |    "metadata": {
 523 |     "jupyter": {
 524 |      "source_hidden": true
 525 |     },
 526 |     "tags": []
 527 |    },
 528 |    "outputs": [],
 529 |    "source": [
 530 |     "model = Sequential()\n",
 531 |     "model.add(Conv2D(16, (3,3), activation='relu', input_shape=(1491, 257,1)))\n",
 532 |     "model.add(Conv2D(16, (3,3), activation='relu'))\n",
 533 |     "model.add(Flatten())\n",
 534 |     "model.add(Dense(128, activation='relu'))\n",
 535 |     "model.add(Dense(1, activation='sigmoid'))"
 536 |    ]
 537 |   },
 538 |   {
 539 |    "cell_type": "code",
 540 |    "execution_count": null,
 541 |    "metadata": {
 542 |     "jupyter": {
 543 |      "source_hidden": true
 544 |     },
 545 |     "tags": []
 546 |    },
 547 |    "outputs": [],
 548 |    "source": [
 549 |     "model.compile('Adam', loss='BinaryCrossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision()])"
 550 |    ]
 551 |   },
 552 |   {
 553 |    "cell_type": "code",
 554 |    "execution_count": null,
 555 |    "metadata": {
 556 |     "jupyter": {
 557 |      "source_hidden": true
 558 |     },
 559 |     "tags": []
 560 |    },
 561 |    "outputs": [],
 562 |    "source": [
 563 |     "model.summary()"
 564 |    ]
 565 |   },
 566 |   {
 567 |    "cell_type": "markdown",
 568 |    "metadata": {},
 569 |    "source": [
 570 |     "## 7.3 Fit Model, View Loss and KPI Plots"
 571 |    ]
 572 |   },
 573 |   {
 574 |    "cell_type": "code",
 575 |    "execution_count": null,
 576 |    "metadata": {
 577 |     "jupyter": {
 578 |      "source_hidden": true
 579 |     },
 580 |     "tags": []
 581 |    },
 582 |    "outputs": [],
 583 |    "source": [
 584 |     "hist = model.fit(train, epochs=4, validation_data=test)"
 585 |    ]
 586 |   },
 587 |   {
 588 |    "cell_type": "code",
 589 |    "execution_count": null,
 590 |    "metadata": {
 591 |     "jupyter": {
 592 |      "source_hidden": true
 593 |     },
 594 |     "tags": []
 595 |    },
 596 |    "outputs": [],
 597 |    "source": [
 598 |     "plt.title('Loss')\n",
 599 |     "plt.plot(hist.history['loss'], 'r')\n",
 600 |     "plt.plot(hist.history['val_loss'], 'b')\n",
 601 |     "plt.show()"
 602 |    ]
 603 |   },
 604 |   {
 605 |    "cell_type": "code",
 606 |    "execution_count": null,
 607 |    "metadata": {
 608 |     "jupyter": {
 609 |      "source_hidden": true
 610 |     },
 611 |     "tags": []
 612 |    },
 613 |    "outputs": [],
 614 |    "source": [
 615 |     "plt.title('Precision')\n",
 616 |     "plt.plot(hist.history['precision'], 'r')\n",
 617 |     "plt.plot(hist.history['val_precision'], 'b')\n",
 618 |     "plt.show()"
 619 |    ]
 620 |   },
 621 |   {
 622 |    "cell_type": "code",
 623 |    "execution_count": null,
 624 |    "metadata": {
 625 |     "jupyter": {
 626 |      "source_hidden": true
 627 |     },
 628 |     "tags": []
 629 |    },
 630 |    "outputs": [],
 631 |    "source": [
 632 |     "plt.title('Recall')\n",
 633 |     "plt.plot(hist.history['recall'], 'r')\n",
 634 |     "plt.plot(hist.history['val_recall'], 'b')\n",
 635 |     "plt.show()"
 636 |    ]
 637 |   },
 638 |   {
 639 |    "cell_type": "markdown",
 640 |    "metadata": {},
 641 |    "source": [
 642 |     "# 8. Make a Prediction on a Single Clip"
 643 |    ]
 644 |   },
 645 |   {
 646 |    "cell_type": "markdown",
 647 |    "metadata": {},
 648 |    "source": [
 649 |     "## 8.1 Get One Batch and Make a Prediction"
 650 |    ]
 651 |   },
 652 |   {
 653 |    "cell_type": "code",
 654 |    "execution_count": null,
 655 |    "metadata": {
 656 |     "jupyter": {
 657 |      "source_hidden": true
 658 |     },
 659 |     "tags": []
 660 |    },
 661 |    "outputs": [],
 662 |    "source": [
 663 |     "X_test, y_test = test.as_numpy_iterator().next()"
 664 |    ]
 665 |   },
 666 |   {
 667 |    "cell_type": "code",
 668 |    "execution_count": null,
 669 |    "metadata": {
 670 |     "jupyter": {
 671 |      "source_hidden": true
 672 |     },
 673 |     "tags": []
 674 |    },
 675 |    "outputs": [],
 676 |    "source": [
 677 |     "yhat = model.predict(X_test)"
 678 |    ]
 679 |   },
 680 |   {
 681 |    "cell_type": "markdown",
 682 |    "metadata": {},
 683 |    "source": [
 684 |     "## 8.2 Convert Logits to Classes "
 685 |    ]
 686 |   },
 687 |   {
 688 |    "cell_type": "code",
 689 |    "execution_count": null,
 690 |    "metadata": {
 691 |     "jupyter": {
 692 |      "source_hidden": true
 693 |     },
 694 |     "tags": []
 695 |    },
 696 |    "outputs": [],
 697 |    "source": [
 698 |     "yhat = [1 if prediction > 0.5 else 0 for prediction in yhat]"
 699 |    ]
 700 |   },
 701 |   {
 702 |    "cell_type": "markdown",
 703 |    "metadata": {
 704 |     "tags": []
 705 |    },
 706 |    "source": [
 707 |     "# 9. Build Forest Parsing Functions"
 708 |    ]
 709 |   },
 710 |   {
 711 |    "cell_type": "markdown",
 712 |    "metadata": {},
 713 |    "source": [
 714 |     "## 9.1 Load up MP3s"
 715 |    ]
 716 |   },
 717 |   {
 718 |    "cell_type": "code",
 719 |    "execution_count": null,
 720 |    "metadata": {
 721 |     "jupyter": {
 722 |      "source_hidden": true
 723 |     }
 724 |    },
 725 |    "outputs": [],
 726 |    "source": [
 727 |     "def load_mp3_16k_mono(filename):\n",
 728 |     "    \"\"\" Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. \"\"\"\n",
 729 |     "    res = tfio.audio.AudioIOTensor(filename)\n",
 730 |     "    # Convert to tensor and combine channels \n",
 731 |     "    tensor = res.to_tensor()\n",
 732 |     "    tensor = tf.math.reduce_sum(tensor, axis=1) / 2 \n",
 733 |     "    # Extract sample rate and cast\n",
 734 |     "    sample_rate = res.rate\n",
 735 |     "    sample_rate = tf.cast(sample_rate, dtype=tf.int64)\n",
 736 |     "    # Resample to 16 kHz\n",
 737 |     "    wav = tfio.audio.resample(tensor, rate_in=sample_rate, rate_out=16000)\n",
 738 |     "    return wav"
 739 |    ]
 740 |   },
 741 |   {
 742 |    "cell_type": "code",
 743 |    "execution_count": null,
 744 |    "metadata": {
 745 |     "jupyter": {
 746 |      "source_hidden": true
 747 |     }
 748 |    },
 749 |    "outputs": [],
 750 |    "source": [
 751 |     "mp3 = os.path.join('data', 'Forest Recordings', 'recording_00.mp3')"
 752 |    ]
 753 |   },
 754 |   {
 755 |    "cell_type": "code",
 756 |    "execution_count": null,
 757 |    "metadata": {
 758 |     "jupyter": {
 759 |      "source_hidden": true
 760 |     },
 761 |     "tags": []
 762 |    },
 763 |    "outputs": [],
 764 |    "source": [
 765 |     "wav = load_mp3_16k_mono(mp3)"
 766 |    ]
 767 |   },
 768 |   {
 769 |    "cell_type": "code",
 770 |    "execution_count": null,
 771 |    "metadata": {
 772 |     "jupyter": {
 773 |      "source_hidden": true
 774 |     }
 775 |    },
 776 |    "outputs": [],
 777 |    "source": [
 778 |     "audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)"
 779 |    ]
 780 |   },
 781 |   {
 782 |    "cell_type": "code",
 783 |    "execution_count": null,
 784 |    "metadata": {
 785 |     "jupyter": {
 786 |      "source_hidden": true
 787 |     }
 788 |    },
 789 |    "outputs": [],
 790 |    "source": [
 791 |     "samples, index = audio_slices.as_numpy_iterator().next()"
 792 |    ]
 793 |   },
 794 |   {
 795 |    "cell_type": "markdown",
 796 |    "metadata": {},
 797 |    "source": [
 798 |     "## 9.2 Build Function to Convert Clips into Windowed Spectrograms"
 799 |    ]
 800 |   },
 801 |   {
 802 |    "cell_type": "code",
 803 |    "execution_count": null,
 804 |    "metadata": {
 805 |     "jupyter": {
 806 |      "source_hidden": true
 807 |     }
 808 |    },
 809 |    "outputs": [],
 810 |    "source": [
 811 |     "def preprocess_mp3(sample, index):\n",
 812 |     "    sample = sample[0]\n",
 813 |     "    zero_padding = tf.zeros([48000] - tf.shape(sample), dtype=tf.float32)\n",
 814 |     "    wav = tf.concat([zero_padding, sample],0)\n",
 815 |     "    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)\n",
 816 |     "    spectrogram = tf.abs(spectrogram)\n",
 817 |     "    spectrogram = tf.expand_dims(spectrogram, axis=2)\n",
 818 |     "    return spectrogram"
 819 |    ]
 820 |   },
 821 |   {
 822 |    "cell_type": "markdown",
 823 |    "metadata": {},
 824 |    "source": [
 825 |     "## 9.3 Convert Longer Clips into Windows and Make Predictions"
 826 |    ]
 827 |   },
 828 |   {
 829 |    "cell_type": "code",
 830 |    "execution_count": null,
 831 |    "metadata": {
 832 |     "jupyter": {
 833 |      "source_hidden": true
 834 |     }
 835 |    },
 836 |    "outputs": [],
 837 |    "source": [
 838 |     "audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=16000, sequence_stride=16000, batch_size=1)\n",
 839 |     "audio_slices = audio_slices.map(preprocess_mp3)\n",
 840 |     "audio_slices = audio_slices.batch(64)"
 841 |    ]
 842 |   },
 843 |   {
 844 |    "cell_type": "code",
 845 |    "execution_count": null,
 846 |    "metadata": {
 847 |     "jupyter": {
 848 |      "source_hidden": true
 849 |     }
 850 |    },
 851 |    "outputs": [],
 852 |    "source": [
 853 |     "yhat = model.predict(audio_slices)\n",
 854 |     "yhat = [1 if prediction > 0.5 else 0 for prediction in yhat]"
 855 |    ]
 856 |   },
 857 |   {
 858 |    "cell_type": "markdown",
 859 |    "metadata": {},
 860 |    "source": [
 861 |     "## 9.4 Group Consecutive Detections"
 862 |    ]
 863 |   },
 864 |   {
 865 |    "cell_type": "code",
 866 |    "execution_count": null,
 867 |    "metadata": {
 868 |     "jupyter": {
 869 |      "source_hidden": true
 870 |     }
 871 |    },
 872 |    "outputs": [],
 873 |    "source": [
 874 |     "from itertools import groupby"
 875 |    ]
 876 |   },
 877 |   {
 878 |    "cell_type": "code",
 879 |    "execution_count": null,
 880 |    "metadata": {
 881 |     "jupyter": {
 882 |      "source_hidden": true
 883 |     }
 884 |    },
 885 |    "outputs": [],
 886 |    "source": [
 887 |     "yhat = [key for key, group in groupby(yhat)]\n",
 888 |     "calls = tf.math.reduce_sum(yhat).numpy()"
 889 |    ]
 890 |   },
 891 |   {
 892 |    "cell_type": "code",
 893 |    "execution_count": null,
 894 |    "metadata": {
 895 |     "jupyter": {
 896 |      "source_hidden": true
 897 |     }
 898 |    },
 899 |    "outputs": [],
 900 |    "source": [
 901 |     "calls"
 902 |    ]
 903 |   },
 904 |   {
 905 |    "cell_type": "markdown",
 906 |    "metadata": {},
 907 |    "source": [
 908 |     "# 10. Make Predictions"
 909 |    ]
 910 |   },
 911 |   {
 912 |    "cell_type": "markdown",
 913 |    "metadata": {},
 914 |    "source": [
 915 |     "## 10.1 Loop over all recordings and make predictions"
 916 |    ]
 917 |   },
 918 |   {
 919 |    "cell_type": "code",
 920 |    "execution_count": null,
 921 |    "metadata": {
 922 |     "jupyter": {
 923 |      "source_hidden": true
 924 |     }
 925 |    },
 926 |    "outputs": [],
 927 |    "source": [
 928 |     "results = {}\n",
 929 |     "for file in os.listdir(os.path.join('data', 'Forest Recordings')):\n",
 930 |     "    FILEPATH = os.path.join('data','Forest Recordings', file)\n",
 931 |     "    \n",
 932 |     "    wav = load_mp3_16k_mono(FILEPATH)\n",
 933 |     "    audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)\n",
 934 |     "    audio_slices = audio_slices.map(preprocess_mp3)\n",
 935 |     "    audio_slices = audio_slices.batch(64)\n",
 936 |     "    \n",
 937 |     "    yhat = model.predict(audio_slices)\n",
 938 |     "    \n",
 939 |     "    results[file] = yhat"
 940 |    ]
 941 |   },
 942 |   {
 943 |    "cell_type": "code",
 944 |    "execution_count": null,
 945 |    "metadata": {
 946 |     "jupyter": {
 947 |      "source_hidden": true
 948 |     },
 949 |     "scrolled": true,
 950 |     "tags": []
 951 |    },
 952 |    "outputs": [],
 953 |    "source": [
 954 |     "results"
 955 |    ]
 956 |   },
 957 |   {
 958 |    "cell_type": "markdown",
 959 |    "metadata": {},
 960 |    "source": [
 961 |     "## 10.2 Convert Predictions into Classes"
 962 |    ]
 963 |   },
 964 |   {
 965 |    "cell_type": "code",
 966 |    "execution_count": null,
 967 |    "metadata": {
 968 |     "jupyter": {
 969 |      "source_hidden": true
 970 |     },
 971 |     "scrolled": true,
 972 |     "tags": []
 973 |    },
 974 |    "outputs": [],
 975 |    "source": [
 976 |     "class_preds = {}\n",
 977 |     "for file, logits in results.items():\n",
 978 |     "    class_preds[file] = [1 if prediction > 0.99 else 0 for prediction in logits]\n",
 979 |     "class_preds"
 980 |    ]
 981 |   },
 982 |   {
 983 |    "cell_type": "markdown",
 984 |    "metadata": {},
 985 |    "source": [
 986 |     "## 10.3 Group Consecutive Detections"
 987 |    ]
 988 |   },
 989 |   {
 990 |    "cell_type": "code",
 991 |    "execution_count": null,
 992 |    "metadata": {
 993 |     "jupyter": {
 994 |      "source_hidden": true
 995 |     },
 996 |     "scrolled": true,
 997 |     "tags": []
 998 |    },
 999 |    "outputs": [],
1000 |    "source": [
1001 |     "postprocessed = {}\n",
1002 |     "for file, scores in class_preds.items():\n",
1003 |     "    postprocessed[file] = tf.math.reduce_sum([key for key, group in groupby(scores)]).numpy()\n",
1004 |     "postprocessed"
1005 |    ]
1006 |   },
1007 |   {
1008 |    "cell_type": "markdown",
1009 |    "metadata": {},
1010 |    "source": [
1011 |     "# 11. Export Results"
1012 |    ]
1013 |   },
1014 |   {
1015 |    "cell_type": "code",
1016 |    "execution_count": null,
1017 |    "metadata": {
1018 |     "jupyter": {
1019 |      "source_hidden": true
1020 |     }
1021 |    },
1022 |    "outputs": [],
1023 |    "source": [
1024 |     "import csv"
1025 |    ]
1026 |   },
1027 |   {
1028 |    "cell_type": "code",
1029 |    "execution_count": null,
1030 |    "metadata": {
1031 |     "jupyter": {
1032 |      "source_hidden": true
1033 |     },
1034 |     "tags": []
1035 |    },
1036 |    "outputs": [],
1037 |    "source": [
1038 |     "with open('results.csv', 'w', newline='') as f:\n",
1039 |     "    writer = csv.writer(f, delimiter=',')\n",
1040 |     "    writer.writerow(['recording', 'capuchin_calls'])\n",
1041 |     "    for key, value in postprocessed.items():\n",
1042 |     "        writer.writerow([key, value])"
1043 |    ]
1044 |   }
1045 |  ],
1046 |  "metadata": {
1047 |   "kernelspec": {
1048 |    "display_name": "audioc",
1049 |    "language": "python",
1050 |    "name": "audioc"
1051 |   },
1052 |   "language_info": {
1053 |    "codemirror_mode": {
1054 |     "name": "ipython",
1055 |     "version": 3
1056 |    },
1057 |    "file_extension": ".py",
1058 |    "mimetype": "text/x-python",
1059 |    "name": "python",
1060 |    "nbconvert_exporter": "python",
1061 |    "pygments_lexer": "ipython3",
1062 |    "version": "3.7.3"
1063 |   }
1064 |  },
1065 |  "nbformat": 4,
1066 |  "nbformat_minor": 4
1067 | }
1068 | 


--------------------------------------------------------------------------------
/results.csv:
--------------------------------------------------------------------------------
  1 | recording,capuchin_calls
  2 | recording_00.mp3,5
  3 | recording_01.mp3,0
  4 | recording_02.mp3,0
  5 | recording_03.mp3,0
  6 | recording_04.mp3,4
  7 | recording_05.mp3,0
  8 | recording_06.mp3,5
  9 | recording_07.mp3,2
 10 | recording_08.mp3,23
 11 | recording_09.mp3,0
 12 | recording_10.mp3,5
 13 | recording_11.mp3,10
 14 | recording_12.mp3,0
 15 | recording_13.mp3,0
 16 | recording_14.mp3,0
 17 | recording_15.mp3,1
 18 | recording_16.mp3,10
 19 | recording_17.mp3,3
 20 | recording_18.mp3,0
 21 | recording_19.mp3,0
 22 | recording_20.mp3,0
 23 | recording_21.mp3,0
 24 | recording_22.mp3,2
 25 | recording_23.mp3,10
 26 | recording_24.mp3,0
 27 | recording_25.mp3,7
 28 | recording_26.mp3,2
 29 | recording_27.mp3,0
 30 | recording_28.mp3,4
 31 | recording_29.mp3,0
 32 | recording_30.mp3,3
 33 | recording_31.mp3,1
 34 | recording_32.mp3,2
 35 | recording_33.mp3,0
 36 | recording_34.mp3,4
 37 | recording_35.mp3,0
 38 | recording_36.mp3,0
 39 | recording_37.mp3,3
 40 | recording_38.mp3,1
 41 | recording_39.mp3,14
 42 | recording_40.mp3,1
 43 | recording_41.mp3,0
 44 | recording_42.mp3,0
 45 | recording_43.mp3,5
 46 | recording_44.mp3,1
 47 | recording_45.mp3,3
 48 | recording_46.mp3,8
 49 | recording_47.mp3,7
 50 | recording_48.mp3,4
 51 | recording_49.mp3,0
 52 | recording_50.mp3,0
 53 | recording_51.mp3,3
 54 | recording_52.mp3,0
 55 | recording_53.mp3,0
 56 | recording_54.mp3,1
 57 | recording_55.mp3,0
 58 | recording_56.mp3,9
 59 | recording_57.mp3,4
 60 | recording_58.mp3,0
 61 | recording_59.mp3,5
 62 | recording_60.mp3,5
 63 | recording_61.mp3,14
 64 | recording_62.mp3,0
 65 | recording_63.mp3,10
 66 | recording_64.mp3,2
 67 | recording_65.mp3,3
 68 | recording_66.mp3,0
 69 | recording_67.mp3,0
 70 | recording_68.mp3,1
 71 | recording_69.mp3,1
 72 | recording_70.mp3,0
 73 | recording_71.mp3,11
 74 | recording_72.mp3,4
 75 | recording_73.mp3,0
 76 | recording_74.mp3,0
 77 | recording_75.mp3,1
 78 | recording_76.mp3,0
 79 | recording_77.mp3,3
 80 | recording_78.mp3,14
 81 | recording_79.mp3,0
 82 | recording_80.mp3,1
 83 | recording_81.mp3,2
 84 | recording_82.mp3,0
 85 | recording_83.mp3,0
 86 | recording_84.mp3,9
 87 | recording_85.mp3,0
 88 | recording_86.mp3,12
 89 | recording_87.mp3,24
 90 | recording_88.mp3,0
 91 | recording_89.mp3,1
 92 | recording_90.mp3,0
 93 | recording_91.mp3,0
 94 | recording_92.mp3,10
 95 | recording_93.mp3,5
 96 | recording_94.mp3,1
 97 | recording_95.mp3,4
 98 | recording_96.mp3,1
 99 | recording_97.mp3,1
100 | recording_98.mp3,23
101 | recording_99.mp3,5
102 | 


--------------------------------------------------------------------------------