├── AudioClassification.ipynb └── results.csv /AudioClassification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 1. Import and Install Dependencies" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## 1.1 Install Dependencies" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "jupyter": { 22 | "source_hidden": true 23 | }, 24 | "tags": [] 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 tensorflow-io matplotlib" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## 1.2 Load Dependencies" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "jupyter": { 43 | "source_hidden": true 44 | }, 45 | "tags": [] 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "import os\n", 50 | "from matplotlib import pyplot as plt\n", 51 | "import tensorflow as tf \n", 52 | "import tensorflow_io as tfio" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "# 2. Build Data Loading Function" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "## 2.1 Define Paths to Files" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "jupyter": { 74 | "source_hidden": true 75 | }, 76 | "tags": [] 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "CAPUCHIN_FILE = os.path.join('data', 'Parsed_Capuchinbird_Clips', 'XC3776-3.wav')\n", 81 | "NOT_CAPUCHIN_FILE = os.path.join('data', 'Parsed_Not_Capuchinbird_Clips', 'afternoon-birds-song-in-forest-0.wav')" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## 2.2 Build Dataloading Function" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "jupyter": { 96 | "source_hidden": true 97 | }, 98 | "tags": [] 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "def load_wav_16k_mono(filename):\n", 103 | " # Load encoded wav file\n", 104 | " file_contents = tf.io.read_file(filename)\n", 105 | " # Decode wav (tensors by channels) \n", 106 | " wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)\n", 107 | " # Removes trailing axis\n", 108 | " wav = tf.squeeze(wav, axis=-1)\n", 109 | " sample_rate = tf.cast(sample_rate, dtype=tf.int64)\n", 110 | " # Goes from 44100Hz to 16000hz - amplitude of the audio signal\n", 111 | " wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)\n", 112 | " return wav" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## 2.3 Plot Wave" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "jupyter": { 127 | "source_hidden": true 128 | }, 129 | "tags": [] 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "wave = load_wav_16k_mono(CAPUCHIN_FILE)\n", 134 | "nwave = load_wav_16k_mono(NOT_CAPUCHIN_FILE)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "jupyter": { 142 | "source_hidden": true 143 | }, 144 | "tags": [] 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "plt.plot(wave)\n", 149 | "plt.plot(nwave)\n", 150 | "plt.show()" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "# 3. Create Tensorflow Dataset" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "## 3.1 Define Paths to Positive and Negative Data" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "jupyter": { 172 | "source_hidden": true 173 | }, 174 | "tags": [] 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "POS = os.path.join('data', 'Parsed_Capuchinbird_Clips')\n", 179 | "NEG = os.path.join('data', 'Parsed_Not_Capuchinbird_Clips')" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "## 3.2 Create Tensorflow Datasets" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "jupyter": { 194 | "source_hidden": true 195 | }, 196 | "tags": [] 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "pos = tf.data.Dataset.list_files(POS+'\\*.wav')\n", 201 | "neg = tf.data.Dataset.list_files(NEG+'\\*.wav')" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "## 3.3 Add labels and Combine Positive and Negative Samples" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "jupyter": { 216 | "source_hidden": true 217 | }, 218 | "tags": [] 219 | }, 220 | "outputs": [], 221 | "source": [ 222 | "positives = tf.data.Dataset.zip((pos, tf.data.Dataset.from_tensor_slices(tf.ones(len(pos)))))\n", 223 | "negatives = tf.data.Dataset.zip((neg, tf.data.Dataset.from_tensor_slices(tf.zeros(len(neg)))))\n", 224 | "data = positives.concatenate(negatives)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "# 4. Determine Average Length of a Capuchin Call" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "## 4.1 Calculate Wave Cycle Length" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": { 245 | "jupyter": { 246 | "source_hidden": true 247 | }, 248 | "tags": [] 249 | }, 250 | "outputs": [], 251 | "source": [ 252 | "lengths = []\n", 253 | "for file in os.listdir(os.path.join('data', 'Parsed_Capuchinbird_Clips')):\n", 254 | " tensor_wave = load_wav_16k_mono(os.path.join('data', 'Parsed_Capuchinbird_Clips', file))\n", 255 | " lengths.append(len(tensor_wave))" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "## 4.2 Calculate Mean, Min and Max" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": { 269 | "jupyter": { 270 | "source_hidden": true 271 | }, 272 | "tags": [] 273 | }, 274 | "outputs": [], 275 | "source": [ 276 | "tf.math.reduce_mean(lengths)" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": { 283 | "jupyter": { 284 | "source_hidden": true 285 | }, 286 | "tags": [] 287 | }, 288 | "outputs": [], 289 | "source": [ 290 | "tf.math.reduce_min(lengths)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": { 297 | "jupyter": { 298 | "source_hidden": true 299 | }, 300 | "tags": [] 301 | }, 302 | "outputs": [], 303 | "source": [ 304 | "tf.math.reduce_max(lengths)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "# 5. Build Preprocessing Function to Convert to Spectrogram" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "## 5.1 Build Preprocessing Function" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": { 325 | "jupyter": { 326 | "source_hidden": true 327 | }, 328 | "tags": [] 329 | }, 330 | "outputs": [], 331 | "source": [ 332 | "def preprocess(file_path, label): \n", 333 | " wav = load_wav_16k_mono(file_path)\n", 334 | " wav = wav[:48000]\n", 335 | " zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)\n", 336 | " wav = tf.concat([zero_padding, wav],0)\n", 337 | " spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)\n", 338 | " spectrogram = tf.abs(spectrogram)\n", 339 | " spectrogram = tf.expand_dims(spectrogram, axis=2)\n", 340 | " return spectrogram, label" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "## 5.2 Test Out the Function and Viz the Spectrogram" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": { 354 | "jupyter": { 355 | "source_hidden": true 356 | }, 357 | "tags": [] 358 | }, 359 | "outputs": [], 360 | "source": [ 361 | "filepath, label = positives.shuffle(buffer_size=10000).as_numpy_iterator().next()" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": { 368 | "jupyter": { 369 | "source_hidden": true 370 | }, 371 | "tags": [] 372 | }, 373 | "outputs": [], 374 | "source": [ 375 | "spectrogram, label = preprocess(filepath, label)" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": { 382 | "jupyter": { 383 | "source_hidden": true 384 | }, 385 | "tags": [] 386 | }, 387 | "outputs": [], 388 | "source": [ 389 | "plt.figure(figsize=(30,20))\n", 390 | "plt.imshow(tf.transpose(spectrogram)[0])\n", 391 | "plt.show()" 392 | ] 393 | }, 394 | { 395 | "cell_type": "markdown", 396 | "metadata": {}, 397 | "source": [ 398 | "# 6. Create Training and Testing Partitions" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": {}, 404 | "source": [ 405 | "## 6.1 Create a Tensorflow Data Pipeline" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": { 412 | "jupyter": { 413 | "source_hidden": true 414 | }, 415 | "tags": [] 416 | }, 417 | "outputs": [], 418 | "source": [ 419 | "data = data.map(preprocess)\n", 420 | "data = data.cache()\n", 421 | "data = data.shuffle(buffer_size=1000)\n", 422 | "data = data.batch(16)\n", 423 | "data = data.prefetch(8)" 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "## 6.2 Split into Training and Testing Partitions" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": null, 436 | "metadata": { 437 | "jupyter": { 438 | "source_hidden": true 439 | }, 440 | "tags": [] 441 | }, 442 | "outputs": [], 443 | "source": [ 444 | "train = data.take(36)\n", 445 | "test = data.skip(36).take(15)" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": {}, 451 | "source": [ 452 | "## 6.3 Test One Batch" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": null, 458 | "metadata": { 459 | "jupyter": { 460 | "source_hidden": true 461 | }, 462 | "tags": [] 463 | }, 464 | "outputs": [], 465 | "source": [ 466 | "samples, labels = train.as_numpy_iterator().next()" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": null, 472 | "metadata": { 473 | "jupyter": { 474 | "source_hidden": true 475 | }, 476 | "tags": [] 477 | }, 478 | "outputs": [], 479 | "source": [ 480 | "samples.shape" 481 | ] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | "# 7. Build Deep Learning Model" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "## 7.1 Load Tensorflow Dependencies" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": null, 500 | "metadata": { 501 | "jupyter": { 502 | "source_hidden": true 503 | }, 504 | "tags": [] 505 | }, 506 | "outputs": [], 507 | "source": [ 508 | "from tensorflow.keras.models import Sequential\n", 509 | "from tensorflow.keras.layers import Conv2D, Dense, Flatten" 510 | ] 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "metadata": {}, 515 | "source": [ 516 | "## 7.2 Build Sequential Model, Compile and View Summary" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": null, 522 | "metadata": { 523 | "jupyter": { 524 | "source_hidden": true 525 | }, 526 | "tags": [] 527 | }, 528 | "outputs": [], 529 | "source": [ 530 | "model = Sequential()\n", 531 | "model.add(Conv2D(16, (3,3), activation='relu', input_shape=(1491, 257,1)))\n", 532 | "model.add(Conv2D(16, (3,3), activation='relu'))\n", 533 | "model.add(Flatten())\n", 534 | "model.add(Dense(128, activation='relu'))\n", 535 | "model.add(Dense(1, activation='sigmoid'))" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": null, 541 | "metadata": { 542 | "jupyter": { 543 | "source_hidden": true 544 | }, 545 | "tags": [] 546 | }, 547 | "outputs": [], 548 | "source": [ 549 | "model.compile('Adam', loss='BinaryCrossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision()])" 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": null, 555 | "metadata": { 556 | "jupyter": { 557 | "source_hidden": true 558 | }, 559 | "tags": [] 560 | }, 561 | "outputs": [], 562 | "source": [ 563 | "model.summary()" 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "## 7.3 Fit Model, View Loss and KPI Plots" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": null, 576 | "metadata": { 577 | "jupyter": { 578 | "source_hidden": true 579 | }, 580 | "tags": [] 581 | }, 582 | "outputs": [], 583 | "source": [ 584 | "hist = model.fit(train, epochs=4, validation_data=test)" 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "execution_count": null, 590 | "metadata": { 591 | "jupyter": { 592 | "source_hidden": true 593 | }, 594 | "tags": [] 595 | }, 596 | "outputs": [], 597 | "source": [ 598 | "plt.title('Loss')\n", 599 | "plt.plot(hist.history['loss'], 'r')\n", 600 | "plt.plot(hist.history['val_loss'], 'b')\n", 601 | "plt.show()" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": null, 607 | "metadata": { 608 | "jupyter": { 609 | "source_hidden": true 610 | }, 611 | "tags": [] 612 | }, 613 | "outputs": [], 614 | "source": [ 615 | "plt.title('Precision')\n", 616 | "plt.plot(hist.history['precision'], 'r')\n", 617 | "plt.plot(hist.history['val_precision'], 'b')\n", 618 | "plt.show()" 619 | ] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "execution_count": null, 624 | "metadata": { 625 | "jupyter": { 626 | "source_hidden": true 627 | }, 628 | "tags": [] 629 | }, 630 | "outputs": [], 631 | "source": [ 632 | "plt.title('Recall')\n", 633 | "plt.plot(hist.history['recall'], 'r')\n", 634 | "plt.plot(hist.history['val_recall'], 'b')\n", 635 | "plt.show()" 636 | ] 637 | }, 638 | { 639 | "cell_type": "markdown", 640 | "metadata": {}, 641 | "source": [ 642 | "# 8. Make a Prediction on a Single Clip" 643 | ] 644 | }, 645 | { 646 | "cell_type": "markdown", 647 | "metadata": {}, 648 | "source": [ 649 | "## 8.1 Get One Batch and Make a Prediction" 650 | ] 651 | }, 652 | { 653 | "cell_type": "code", 654 | "execution_count": null, 655 | "metadata": { 656 | "jupyter": { 657 | "source_hidden": true 658 | }, 659 | "tags": [] 660 | }, 661 | "outputs": [], 662 | "source": [ 663 | "X_test, y_test = test.as_numpy_iterator().next()" 664 | ] 665 | }, 666 | { 667 | "cell_type": "code", 668 | "execution_count": null, 669 | "metadata": { 670 | "jupyter": { 671 | "source_hidden": true 672 | }, 673 | "tags": [] 674 | }, 675 | "outputs": [], 676 | "source": [ 677 | "yhat = model.predict(X_test)" 678 | ] 679 | }, 680 | { 681 | "cell_type": "markdown", 682 | "metadata": {}, 683 | "source": [ 684 | "## 8.2 Convert Logits to Classes " 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": null, 690 | "metadata": { 691 | "jupyter": { 692 | "source_hidden": true 693 | }, 694 | "tags": [] 695 | }, 696 | "outputs": [], 697 | "source": [ 698 | "yhat = [1 if prediction > 0.5 else 0 for prediction in yhat]" 699 | ] 700 | }, 701 | { 702 | "cell_type": "markdown", 703 | "metadata": { 704 | "tags": [] 705 | }, 706 | "source": [ 707 | "# 9. Build Forest Parsing Functions" 708 | ] 709 | }, 710 | { 711 | "cell_type": "markdown", 712 | "metadata": {}, 713 | "source": [ 714 | "## 9.1 Load up MP3s" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": null, 720 | "metadata": { 721 | "jupyter": { 722 | "source_hidden": true 723 | } 724 | }, 725 | "outputs": [], 726 | "source": [ 727 | "def load_mp3_16k_mono(filename):\n", 728 | " \"\"\" Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. \"\"\"\n", 729 | " res = tfio.audio.AudioIOTensor(filename)\n", 730 | " # Convert to tensor and combine channels \n", 731 | " tensor = res.to_tensor()\n", 732 | " tensor = tf.math.reduce_sum(tensor, axis=1) / 2 \n", 733 | " # Extract sample rate and cast\n", 734 | " sample_rate = res.rate\n", 735 | " sample_rate = tf.cast(sample_rate, dtype=tf.int64)\n", 736 | " # Resample to 16 kHz\n", 737 | " wav = tfio.audio.resample(tensor, rate_in=sample_rate, rate_out=16000)\n", 738 | " return wav" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": null, 744 | "metadata": { 745 | "jupyter": { 746 | "source_hidden": true 747 | } 748 | }, 749 | "outputs": [], 750 | "source": [ 751 | "mp3 = os.path.join('data', 'Forest Recordings', 'recording_00.mp3')" 752 | ] 753 | }, 754 | { 755 | "cell_type": "code", 756 | "execution_count": null, 757 | "metadata": { 758 | "jupyter": { 759 | "source_hidden": true 760 | }, 761 | "tags": [] 762 | }, 763 | "outputs": [], 764 | "source": [ 765 | "wav = load_mp3_16k_mono(mp3)" 766 | ] 767 | }, 768 | { 769 | "cell_type": "code", 770 | "execution_count": null, 771 | "metadata": { 772 | "jupyter": { 773 | "source_hidden": true 774 | } 775 | }, 776 | "outputs": [], 777 | "source": [ 778 | "audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)" 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": null, 784 | "metadata": { 785 | "jupyter": { 786 | "source_hidden": true 787 | } 788 | }, 789 | "outputs": [], 790 | "source": [ 791 | "samples, index = audio_slices.as_numpy_iterator().next()" 792 | ] 793 | }, 794 | { 795 | "cell_type": "markdown", 796 | "metadata": {}, 797 | "source": [ 798 | "## 9.2 Build Function to Convert Clips into Windowed Spectrograms" 799 | ] 800 | }, 801 | { 802 | "cell_type": "code", 803 | "execution_count": null, 804 | "metadata": { 805 | "jupyter": { 806 | "source_hidden": true 807 | } 808 | }, 809 | "outputs": [], 810 | "source": [ 811 | "def preprocess_mp3(sample, index):\n", 812 | " sample = sample[0]\n", 813 | " zero_padding = tf.zeros([48000] - tf.shape(sample), dtype=tf.float32)\n", 814 | " wav = tf.concat([zero_padding, sample],0)\n", 815 | " spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)\n", 816 | " spectrogram = tf.abs(spectrogram)\n", 817 | " spectrogram = tf.expand_dims(spectrogram, axis=2)\n", 818 | " return spectrogram" 819 | ] 820 | }, 821 | { 822 | "cell_type": "markdown", 823 | "metadata": {}, 824 | "source": [ 825 | "## 9.3 Convert Longer Clips into Windows and Make Predictions" 826 | ] 827 | }, 828 | { 829 | "cell_type": "code", 830 | "execution_count": null, 831 | "metadata": { 832 | "jupyter": { 833 | "source_hidden": true 834 | } 835 | }, 836 | "outputs": [], 837 | "source": [ 838 | "audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=16000, sequence_stride=16000, batch_size=1)\n", 839 | "audio_slices = audio_slices.map(preprocess_mp3)\n", 840 | "audio_slices = audio_slices.batch(64)" 841 | ] 842 | }, 843 | { 844 | "cell_type": "code", 845 | "execution_count": null, 846 | "metadata": { 847 | "jupyter": { 848 | "source_hidden": true 849 | } 850 | }, 851 | "outputs": [], 852 | "source": [ 853 | "yhat = model.predict(audio_slices)\n", 854 | "yhat = [1 if prediction > 0.5 else 0 for prediction in yhat]" 855 | ] 856 | }, 857 | { 858 | "cell_type": "markdown", 859 | "metadata": {}, 860 | "source": [ 861 | "## 9.4 Group Consecutive Detections" 862 | ] 863 | }, 864 | { 865 | "cell_type": "code", 866 | "execution_count": null, 867 | "metadata": { 868 | "jupyter": { 869 | "source_hidden": true 870 | } 871 | }, 872 | "outputs": [], 873 | "source": [ 874 | "from itertools import groupby" 875 | ] 876 | }, 877 | { 878 | "cell_type": "code", 879 | "execution_count": null, 880 | "metadata": { 881 | "jupyter": { 882 | "source_hidden": true 883 | } 884 | }, 885 | "outputs": [], 886 | "source": [ 887 | "yhat = [key for key, group in groupby(yhat)]\n", 888 | "calls = tf.math.reduce_sum(yhat).numpy()" 889 | ] 890 | }, 891 | { 892 | "cell_type": "code", 893 | "execution_count": null, 894 | "metadata": { 895 | "jupyter": { 896 | "source_hidden": true 897 | } 898 | }, 899 | "outputs": [], 900 | "source": [ 901 | "calls" 902 | ] 903 | }, 904 | { 905 | "cell_type": "markdown", 906 | "metadata": {}, 907 | "source": [ 908 | "# 10. Make Predictions" 909 | ] 910 | }, 911 | { 912 | "cell_type": "markdown", 913 | "metadata": {}, 914 | "source": [ 915 | "## 10.1 Loop over all recordings and make predictions" 916 | ] 917 | }, 918 | { 919 | "cell_type": "code", 920 | "execution_count": null, 921 | "metadata": { 922 | "jupyter": { 923 | "source_hidden": true 924 | } 925 | }, 926 | "outputs": [], 927 | "source": [ 928 | "results = {}\n", 929 | "for file in os.listdir(os.path.join('data', 'Forest Recordings')):\n", 930 | " FILEPATH = os.path.join('data','Forest Recordings', file)\n", 931 | " \n", 932 | " wav = load_mp3_16k_mono(FILEPATH)\n", 933 | " audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)\n", 934 | " audio_slices = audio_slices.map(preprocess_mp3)\n", 935 | " audio_slices = audio_slices.batch(64)\n", 936 | " \n", 937 | " yhat = model.predict(audio_slices)\n", 938 | " \n", 939 | " results[file] = yhat" 940 | ] 941 | }, 942 | { 943 | "cell_type": "code", 944 | "execution_count": null, 945 | "metadata": { 946 | "jupyter": { 947 | "source_hidden": true 948 | }, 949 | "scrolled": true, 950 | "tags": [] 951 | }, 952 | "outputs": [], 953 | "source": [ 954 | "results" 955 | ] 956 | }, 957 | { 958 | "cell_type": "markdown", 959 | "metadata": {}, 960 | "source": [ 961 | "## 10.2 Convert Predictions into Classes" 962 | ] 963 | }, 964 | { 965 | "cell_type": "code", 966 | "execution_count": null, 967 | "metadata": { 968 | "jupyter": { 969 | "source_hidden": true 970 | }, 971 | "scrolled": true, 972 | "tags": [] 973 | }, 974 | "outputs": [], 975 | "source": [ 976 | "class_preds = {}\n", 977 | "for file, logits in results.items():\n", 978 | " class_preds[file] = [1 if prediction > 0.99 else 0 for prediction in logits]\n", 979 | "class_preds" 980 | ] 981 | }, 982 | { 983 | "cell_type": "markdown", 984 | "metadata": {}, 985 | "source": [ 986 | "## 10.3 Group Consecutive Detections" 987 | ] 988 | }, 989 | { 990 | "cell_type": "code", 991 | "execution_count": null, 992 | "metadata": { 993 | "jupyter": { 994 | "source_hidden": true 995 | }, 996 | "scrolled": true, 997 | "tags": [] 998 | }, 999 | "outputs": [], 1000 | "source": [ 1001 | "postprocessed = {}\n", 1002 | "for file, scores in class_preds.items():\n", 1003 | " postprocessed[file] = tf.math.reduce_sum([key for key, group in groupby(scores)]).numpy()\n", 1004 | "postprocessed" 1005 | ] 1006 | }, 1007 | { 1008 | "cell_type": "markdown", 1009 | "metadata": {}, 1010 | "source": [ 1011 | "# 11. Export Results" 1012 | ] 1013 | }, 1014 | { 1015 | "cell_type": "code", 1016 | "execution_count": null, 1017 | "metadata": { 1018 | "jupyter": { 1019 | "source_hidden": true 1020 | } 1021 | }, 1022 | "outputs": [], 1023 | "source": [ 1024 | "import csv" 1025 | ] 1026 | }, 1027 | { 1028 | "cell_type": "code", 1029 | "execution_count": null, 1030 | "metadata": { 1031 | "jupyter": { 1032 | "source_hidden": true 1033 | }, 1034 | "tags": [] 1035 | }, 1036 | "outputs": [], 1037 | "source": [ 1038 | "with open('results.csv', 'w', newline='') as f:\n", 1039 | " writer = csv.writer(f, delimiter=',')\n", 1040 | " writer.writerow(['recording', 'capuchin_calls'])\n", 1041 | " for key, value in postprocessed.items():\n", 1042 | " writer.writerow([key, value])" 1043 | ] 1044 | } 1045 | ], 1046 | "metadata": { 1047 | "kernelspec": { 1048 | "display_name": "audioc", 1049 | "language": "python", 1050 | "name": "audioc" 1051 | }, 1052 | "language_info": { 1053 | "codemirror_mode": { 1054 | "name": "ipython", 1055 | "version": 3 1056 | }, 1057 | "file_extension": ".py", 1058 | "mimetype": "text/x-python", 1059 | "name": "python", 1060 | "nbconvert_exporter": "python", 1061 | "pygments_lexer": "ipython3", 1062 | "version": "3.7.3" 1063 | } 1064 | }, 1065 | "nbformat": 4, 1066 | "nbformat_minor": 4 1067 | } 1068 | -------------------------------------------------------------------------------- /results.csv: -------------------------------------------------------------------------------- 1 | recording,capuchin_calls 2 | recording_00.mp3,5 3 | recording_01.mp3,0 4 | recording_02.mp3,0 5 | recording_03.mp3,0 6 | recording_04.mp3,4 7 | recording_05.mp3,0 8 | recording_06.mp3,5 9 | recording_07.mp3,2 10 | recording_08.mp3,23 11 | recording_09.mp3,0 12 | recording_10.mp3,5 13 | recording_11.mp3,10 14 | recording_12.mp3,0 15 | recording_13.mp3,0 16 | recording_14.mp3,0 17 | recording_15.mp3,1 18 | recording_16.mp3,10 19 | recording_17.mp3,3 20 | recording_18.mp3,0 21 | recording_19.mp3,0 22 | recording_20.mp3,0 23 | recording_21.mp3,0 24 | recording_22.mp3,2 25 | recording_23.mp3,10 26 | recording_24.mp3,0 27 | recording_25.mp3,7 28 | recording_26.mp3,2 29 | recording_27.mp3,0 30 | recording_28.mp3,4 31 | recording_29.mp3,0 32 | recording_30.mp3,3 33 | recording_31.mp3,1 34 | recording_32.mp3,2 35 | recording_33.mp3,0 36 | recording_34.mp3,4 37 | recording_35.mp3,0 38 | recording_36.mp3,0 39 | recording_37.mp3,3 40 | recording_38.mp3,1 41 | recording_39.mp3,14 42 | recording_40.mp3,1 43 | recording_41.mp3,0 44 | recording_42.mp3,0 45 | recording_43.mp3,5 46 | recording_44.mp3,1 47 | recording_45.mp3,3 48 | recording_46.mp3,8 49 | recording_47.mp3,7 50 | recording_48.mp3,4 51 | recording_49.mp3,0 52 | recording_50.mp3,0 53 | recording_51.mp3,3 54 | recording_52.mp3,0 55 | recording_53.mp3,0 56 | recording_54.mp3,1 57 | recording_55.mp3,0 58 | recording_56.mp3,9 59 | recording_57.mp3,4 60 | recording_58.mp3,0 61 | recording_59.mp3,5 62 | recording_60.mp3,5 63 | recording_61.mp3,14 64 | recording_62.mp3,0 65 | recording_63.mp3,10 66 | recording_64.mp3,2 67 | recording_65.mp3,3 68 | recording_66.mp3,0 69 | recording_67.mp3,0 70 | recording_68.mp3,1 71 | recording_69.mp3,1 72 | recording_70.mp3,0 73 | recording_71.mp3,11 74 | recording_72.mp3,4 75 | recording_73.mp3,0 76 | recording_74.mp3,0 77 | recording_75.mp3,1 78 | recording_76.mp3,0 79 | recording_77.mp3,3 80 | recording_78.mp3,14 81 | recording_79.mp3,0 82 | recording_80.mp3,1 83 | recording_81.mp3,2 84 | recording_82.mp3,0 85 | recording_83.mp3,0 86 | recording_84.mp3,9 87 | recording_85.mp3,0 88 | recording_86.mp3,12 89 | recording_87.mp3,24 90 | recording_88.mp3,0 91 | recording_89.mp3,1 92 | recording_90.mp3,0 93 | recording_91.mp3,0 94 | recording_92.mp3,10 95 | recording_93.mp3,5 96 | recording_94.mp3,1 97 | recording_95.mp3,4 98 | recording_96.mp3,1 99 | recording_97.mp3,1 100 | recording_98.mp3,23 101 | recording_99.mp3,5 102 | --------------------------------------------------------------------------------