├── .gitignore
├── media
    ├── palm.png
    ├── results.png
    ├── palm_emnlp.png
    ├── print_results.png
    └── palm_vs_palm_dagger.png
├── logs
    ├── results.sh
    ├── zeroshot
    │   ├── CREMA-D.json
    │   ├── ESC50.json
    │   ├── RAVDESS.json
    │   ├── SESA.json
    │   ├── TUT2017.json
    │   ├── ESC50-FOLD1.json
    │   ├── ESC50-FOLD2.json
    │   ├── ESC50-FOLD3.json
    │   ├── ESC50-FOLD4.json
    │   ├── ESC50-FOLD5.json
    │   ├── NS-Instruments.json
    │   ├── TUT2017-FOLD2.json
    │   ├── TUT2017-FOLD3.json
    │   ├── UrbanSound8K.json
    │   ├── VocalSound.json
    │   ├── Beijing-Opera-FOLD2.json
    │   ├── Beijing-Opera-FOLD3.json
    │   ├── Beijing-Opera-FOLD4.json
    │   ├── Beijing-Opera.json
    │   ├── ESC50-Actions-FOLD1.json
    │   ├── ESC50-Actions-FOLD3.json
    │   ├── ESC50-Actions.json
    │   ├── GT-Music-Genre.json
    │   ├── TUT2017-FOLD1.json
    │   ├── TUT2017-FOLD4.json
    │   ├── UrbanSound8K-FOLD2.json
    │   ├── UrbanSound8K-FOLD3.json
    │   ├── UrbanSound8K-FOLD4.json
    │   ├── UrbanSound8K-FOLD6.json
    │   ├── UrbanSound8K-FOLD8.json
    │   ├── Beijing-Opera-FOLD1.json
    │   ├── Beijing-Opera-FOLD5.json
    │   ├── ESC50-Actions-FOLD2.json
    │   ├── ESC50-Actions-FOLD4.json
    │   ├── ESC50-Actions-FOLD5.json
    │   ├── UrbanSound8K-FOLD1.json
    │   ├── UrbanSound8K-FOLD10.json
    │   ├── UrbanSound8K-FOLD5.json
    │   ├── UrbanSound8K-FOLD7.json
    │   ├── UrbanSound8K-FOLD9.json
    │   ├── accuracy.json
    │   ├── f1_score.json
    │   ├── SESA-SEED0.log
    │   ├── ESC50-Actions-FOLD1-SEED0.log
    │   ├── ESC50-Actions-FOLD2-SEED0.log
    │   ├── ESC50-Actions-FOLD3-SEED0.log
    │   ├── ESC50-Actions-FOLD5-SEED0.log
    │   ├── Beijing-Opera-FOLD1-SEED0.log
    │   ├── Beijing-Opera-FOLD2-SEED0.log
    │   ├── Beijing-Opera-FOLD3-SEED0.log
    │   ├── Beijing-Opera-FOLD4-SEED0.log
    │   ├── Beijing-Opera-FOLD5-SEED0.log
    │   ├── UrbanSound8K-FOLD1-SEED0.log
    │   ├── UrbanSound8K-FOLD10-SEED0.log
    │   ├── UrbanSound8K-FOLD2-SEED0.log
    │   └── UrbanSound8K-FOLD5-SEED0.log
    ├── palm
    │   ├── ESC50-Actions-FOLD3.json
    │   ├── ESC50.json
    │   ├── ESC50-Actions.json
    │   ├── ESC50-FOLD1.json
    │   ├── SESA.json
    │   ├── Beijing-Opera-FOLD1.json
    │   ├── Beijing-Opera-FOLD2.json
    │   ├── Beijing-Opera-FOLD3.json
    │   ├── Beijing-Opera-FOLD4.json
    │   ├── CREMA-D.json
    │   ├── ESC50-FOLD2.json
    │   ├── ESC50-FOLD3.json
    │   ├── ESC50-FOLD4.json
    │   ├── ESC50-FOLD5.json
    │   ├── RAVDESS.json
    │   ├── TUT2017.json
    │   ├── VocalSound.json
    │   ├── Beijing-Opera.json
    │   ├── ESC50-Actions-FOLD1.json
    │   ├── ESC50-Actions-FOLD4.json
    │   ├── GT-Music-Genre.json
    │   ├── NS-Instruments.json
    │   ├── TUT2017-FOLD1.json
    │   ├── TUT2017-FOLD2.json
    │   ├── TUT2017-FOLD3.json
    │   ├── TUT2017-FOLD4.json
    │   ├── UrbanSound8K.json
    │   ├── ESC50-Actions-FOLD2.json
    │   ├── ESC50-Actions-FOLD5.json
    │   ├── UrbanSound8K-FOLD1.json
    │   ├── UrbanSound8K-FOLD10.json
    │   ├── UrbanSound8K-FOLD2.json
    │   ├── UrbanSound8K-FOLD3.json
    │   ├── UrbanSound8K-FOLD4.json
    │   ├── UrbanSound8K-FOLD5.json
    │   ├── UrbanSound8K-FOLD6.json
    │   ├── UrbanSound8K-FOLD7.json
    │   ├── UrbanSound8K-FOLD8.json
    │   ├── UrbanSound8K-FOLD9.json
    │   ├── Beijing-Opera-FOLD5.json
    │   ├── accuracy.json
    │   └── f1_score.json
    ├── cocoop
    │   ├── Beijing-Opera-FOLD2.json
    │   ├── ESC50.json
    │   ├── Beijing-Opera-FOLD1.json
    │   ├── Beijing-Opera-FOLD4.json
    │   ├── Beijing-Opera-FOLD5.json
    │   ├── ESC50-Actions-FOLD3.json
    │   ├── ESC50-Actions.json
    │   ├── ESC50-FOLD2.json
    │   ├── ESC50-FOLD3.json
    │   ├── ESC50-FOLD4.json
    │   ├── ESC50-FOLD5.json
    │   ├── SESA.json
    │   ├── Beijing-Opera.json
    │   ├── CREMA-D.json
    │   ├── ESC50-Actions-FOLD5.json
    │   ├── ESC50-FOLD1.json
    │   ├── GT-Music-Genre.json
    │   ├── RAVDESS.json
    │   ├── TUT2017-FOLD1.json
    │   ├── TUT2017-FOLD3.json
    │   ├── TUT2017-FOLD4.json
    │   ├── TUT2017.json
    │   ├── UrbanSound8K.json
    │   ├── VocalSound.json
    │   ├── ESC50-Actions-FOLD1.json
    │   ├── ESC50-Actions-FOLD2.json
    │   ├── ESC50-Actions-FOLD4.json
    │   ├── NS-Instruments.json
    │   ├── TUT2017-FOLD2.json
    │   ├── UrbanSound8K-FOLD10.json
    │   ├── UrbanSound8K-FOLD9.json
    │   ├── Beijing-Opera-FOLD3.json
    │   ├── UrbanSound8K-FOLD1.json
    │   ├── UrbanSound8K-FOLD2.json
    │   ├── UrbanSound8K-FOLD3.json
    │   ├── UrbanSound8K-FOLD4.json
    │   ├── UrbanSound8K-FOLD5.json
    │   ├── UrbanSound8K-FOLD6.json
    │   ├── UrbanSound8K-FOLD7.json
    │   ├── UrbanSound8K-FOLD8.json
    │   ├── accuracy.json
    │   └── f1_score.json
    ├── coop
    │   ├── ESC50-Actions-FOLD3.json
    │   ├── ESC50-Actions-FOLD4.json
    │   ├── ESC50-FOLD4.json
    │   ├── ESC50.json
    │   ├── SESA.json
    │   ├── Beijing-Opera-FOLD4.json
    │   ├── CREMA-D.json
    │   ├── ESC50-Actions.json
    │   ├── ESC50-FOLD1.json
    │   ├── ESC50-FOLD2.json
    │   ├── ESC50-FOLD3.json
    │   ├── ESC50-FOLD5.json
    │   ├── RAVDESS.json
    │   ├── TUT2017.json
    │   ├── VocalSound.json
    │   ├── Beijing-Opera.json
    │   ├── ESC50-Actions-FOLD2.json
    │   ├── ESC50-Actions-FOLD5.json
    │   ├── GT-Music-Genre.json
    │   ├── TUT2017-FOLD1.json
    │   ├── TUT2017-FOLD2.json
    │   ├── TUT2017-FOLD3.json
    │   ├── TUT2017-FOLD4.json
    │   ├── UrbanSound8K.json
    │   ├── Beijing-Opera-FOLD1.json
    │   ├── Beijing-Opera-FOLD2.json
    │   ├── Beijing-Opera-FOLD3.json
    │   ├── ESC50-Actions-FOLD1.json
    │   ├── NS-Instruments.json
    │   ├── UrbanSound8K-FOLD1.json
    │   ├── UrbanSound8K-FOLD10.json
    │   ├── UrbanSound8K-FOLD2.json
    │   ├── UrbanSound8K-FOLD3.json
    │   ├── UrbanSound8K-FOLD4.json
    │   ├── UrbanSound8K-FOLD5.json
    │   ├── UrbanSound8K-FOLD6.json
    │   ├── UrbanSound8K-FOLD7.json
    │   ├── UrbanSound8K-FOLD8.json
    │   ├── UrbanSound8K-FOLD9.json
    │   ├── Beijing-Opera-FOLD5.json
    │   ├── accuracy.json
    │   └── f1_score.json
    ├── process_results.py
    └── print_results.py
├── pengi
    ├── models
    │   ├── __init__.py
    │   └── audio.py
    ├── __init__.py
    └── configs
    │   ├── base.yml
    │   └── base_no_text_enc.yml
├── palm
    ├── __init__.py
    ├── zeroshot.py
    └── palm.py
├── scripts
    ├── run_all_datasets_coop.sh
    ├── run_all_datasets_palm.sh
    ├── run_all_datasets_cocoop.sh
    ├── run_all_datasets_zeroshot.sh
    ├── run_all_datasets_all_methods.sh
    ├── sesa.sh
    ├── crema_d.sh
    ├── ravdess.sh
    ├── vocal_sound.sh
    ├── gt_music_genre.sh
    ├── ns_instruments.sh
    ├── esc50.sh
    ├── tut.sh
    ├── beijing_opera.sh
    ├── esc50_actions.sh
    └── urban_sound.sh
├── requirements.txt
├── LICENSE
├── main.py
└── utils
    ├── trainer.py
    └── dataset.py


/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__/
2 | pengi/configs/base.pth


--------------------------------------------------------------------------------
/media/palm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asif-hanif/palm/HEAD/media/palm.png


--------------------------------------------------------------------------------
/media/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asif-hanif/palm/HEAD/media/results.png


--------------------------------------------------------------------------------
/media/palm_emnlp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asif-hanif/palm/HEAD/media/palm_emnlp.png


--------------------------------------------------------------------------------
/media/print_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asif-hanif/palm/HEAD/media/print_results.png


--------------------------------------------------------------------------------
/logs/results.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python process_folds.py
3 | python process_results.py
4 | python print_results.py


--------------------------------------------------------------------------------
/media/palm_vs_palm_dagger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asif-hanif/palm/HEAD/media/palm_vs_palm_dagger.png


--------------------------------------------------------------------------------
/pengi/models/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | # import models.audio
4 | # import models.config
5 | # import models.decoder
6 | # import models.pengi


--------------------------------------------------------------------------------
/palm/__init__.py:
--------------------------------------------------------------------------------
1 | from .zeroshot import ZeroShotPENGI as ZeroShot
2 | from .coop import CustomPENGI as COOP
3 | from .cocoop import CustomPENGI as COCOOP
4 | from .palm import CustomPENGI as PALM
5 | 
6 | 


--------------------------------------------------------------------------------
/logs/zeroshot/CREMA-D.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.231,
 4 |     "f1_score": 0.172,
 5 |     "precision": 0.2026,
 6 |     "recall": 0.2609,
 7 |     "avg_loss": 2.883,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.4965,
 4 |     "f1_score": 0.4478,
 5 |     "precision": 0.5376,
 6 |     "recall": 0.4965,
 7 |     "avg_loss": 1.8702,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/RAVDESS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.1222,
 4 |     "f1_score": 0.0771,
 5 |     "precision": 0.2227,
 6 |     "recall": 0.1432,
 7 |     "avg_loss": 3.9791,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/SESA.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7238,
 4 |     "f1_score": 0.6827,
 5 |     "precision": 0.6941,
 6 |     "recall": 0.7508,
 7 |     "avg_loss": 1.3722,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/TUT2017.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.2435,
 4 |     "f1_score": 0.1795,
 5 |     "precision": 0.2958,
 6 |     "recall": 0.2434,
 7 |     "avg_loss": 3.5088,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.4975,
 4 |     "f1_score": 0.4464,
 5 |     "precision": 0.5487,
 6 |     "recall": 0.4975,
 7 |     "avg_loss": 1.7492,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.4825,
 4 |     "f1_score": 0.4288,
 5 |     "precision": 0.5242,
 6 |     "recall": 0.4825,
 7 |     "avg_loss": 2.0177,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.495,
 4 |     "f1_score": 0.4421,
 5 |     "precision": 0.5346,
 6 |     "recall": 0.495,
 7 |     "avg_loss": 1.8511,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.4925,
 4 |     "f1_score": 0.4422,
 5 |     "precision": 0.5054,
 6 |     "recall": 0.4925,
 7 |     "avg_loss": 1.7776,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.515,
 4 |     "f1_score": 0.4794,
 5 |     "precision": 0.575,
 6 |     "recall": 0.515,
 7 |     "avg_loss": 1.9556,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/NS-Instruments.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.3291,
 4 |     "f1_score": 0.26,
 5 |     "precision": 0.308,
 6 |     "recall": 0.2962,
 7 |     "avg_loss": 2.8539,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/TUT2017-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.2401,
 4 |     "f1_score": 0.174,
 5 |     "precision": 0.3072,
 6 |     "recall": 0.24,
 7 |     "avg_loss": 3.5171,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/TUT2017-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.2429,
 4 |     "f1_score": 0.1797,
 5 |     "precision": 0.326,
 6 |     "recall": 0.2428,
 7 |     "avg_loss": 3.431,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.5349,
 4 |     "f1_score": 0.5144,
 5 |     "precision": 0.5742,
 6 |     "recall": 0.5464,
 7 |     "avg_loss": 1.7085,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/VocalSound.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.4197,
 4 |     "f1_score": 0.3834,
 5 |     "precision": 0.4974,
 6 |     "recall": 0.4195,
 7 |     "avg_loss": 1.7859,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/Beijing-Opera-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.2766,
 4 |     "f1_score": 0.1083,
 5 |     "precision": 0.0691,
 6 |     "recall": 0.25,
 7 |     "avg_loss": 3.7753,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/Beijing-Opera-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.2766,
 4 |     "f1_score": 0.1083,
 5 |     "precision": 0.0691,
 6 |     "recall": 0.25,
 7 |     "avg_loss": 3.8906,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/Beijing-Opera-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.2766,
 4 |     "f1_score": 0.1083,
 5 |     "precision": 0.0691,
 6 |     "recall": 0.25,
 7 |     "avg_loss": 3.9215,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/Beijing-Opera.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.2881,
 4 |     "f1_score": 0.1321,
 5 |     "precision": 0.1697,
 6 |     "recall": 0.2633,
 7 |     "avg_loss": 3.8613,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-Actions-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7,
 4 |     "f1_score": 0.6518,
 5 |     "precision": 0.7361,
 6 |     "recall": 0.7,
 7 |     "avg_loss": 1.0603,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-Actions-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.675,
 4 |     "f1_score": 0.6251,
 5 |     "precision": 0.6599,
 6 |     "recall": 0.675,
 7 |     "avg_loss": 0.8852,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-Actions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6525,
 4 |     "f1_score": 0.6138,
 5 |     "precision": 0.6874,
 6 |     "recall": 0.6525,
 7 |     "avg_loss": 1.0578,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/GT-Music-Genre.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.325,
 4 |     "f1_score": 0.2807,
 5 |     "precision": 0.3092,
 6 |     "recall": 0.3406,
 7 |     "avg_loss": 4.1713,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/TUT2017-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.2453,
 4 |     "f1_score": 0.1821,
 5 |     "precision": 0.2455,
 6 |     "recall": 0.2453,
 7 |     "avg_loss": 3.5917,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/TUT2017-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.2456,
 4 |     "f1_score": 0.1821,
 5 |     "precision": 0.3044,
 6 |     "recall": 0.2456,
 7 |     "avg_loss": 3.4954,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.545,
 4 |     "f1_score": 0.501,
 5 |     "precision": 0.5382,
 6 |     "recall": 0.5059,
 7 |     "avg_loss": 1.6292,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.467,
 4 |     "f1_score": 0.4633,
 5 |     "precision": 0.5381,
 6 |     "recall": 0.4956,
 7 |     "avg_loss": 2.0666,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.5677,
 4 |     "f1_score": 0.5618,
 5 |     "precision": 0.6388,
 6 |     "recall": 0.5754,
 7 |     "avg_loss": 1.345,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD6.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.5128,
 4 |     "f1_score": 0.502,
 5 |     "precision": 0.5619,
 6 |     "recall": 0.5428,
 7 |     "avg_loss": 1.9537,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD8.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.5261,
 4 |     "f1_score": 0.5193,
 5 |     "precision": 0.576,
 6 |     "recall": 0.5532,
 7 |     "avg_loss": 1.7248,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/Beijing-Opera-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.2917,
 4 |     "f1_score": 0.1468,
 5 |     "precision": 0.3191,
 6 |     "recall": 0.2708,
 7 |     "avg_loss": 4.0296,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/Beijing-Opera-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.3191,
 4 |     "f1_score": 0.189,
 5 |     "precision": 0.3222,
 6 |     "recall": 0.2955,
 7 |     "avg_loss": 3.6894,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-Actions-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6375,
 4 |     "f1_score": 0.5951,
 5 |     "precision": 0.7354,
 6 |     "recall": 0.6375,
 7 |     "avg_loss": 1.1233,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-Actions-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6125,
 4 |     "f1_score": 0.578,
 5 |     "precision": 0.6303,
 6 |     "recall": 0.6125,
 7 |     "avg_loss": 1.1387,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-Actions-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6375,
 4 |     "f1_score": 0.6191,
 5 |     "precision": 0.6751,
 6 |     "recall": 0.6375,
 7 |     "avg_loss": 1.0817,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.5487,
 4 |     "f1_score": 0.5236,
 5 |     "precision": 0.5844,
 6 |     "recall": 0.5538,
 7 |     "avg_loss": 1.6837,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD10.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.5078,
 4 |     "f1_score": 0.4733,
 5 |     "precision": 0.5256,
 6 |     "recall": 0.512,
 7 |     "avg_loss": 1.7276,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6047,
 4 |     "f1_score": 0.5699,
 5 |     "precision": 0.6039,
 6 |     "recall": 0.6031,
 7 |     "avg_loss": 1.4961,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD7.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.5274,
 4 |     "f1_score": 0.5195,
 5 |     "precision": 0.6315,
 6 |     "recall": 0.5583,
 7 |     "avg_loss": 1.7977,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD9.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.5417,
 4 |     "f1_score": 0.5105,
 5 |     "precision": 0.5439,
 6 |     "recall": 0.5643,
 7 |     "avg_loss": 1.6609,
 8 |     "epoch": -1
 9 |   }
10 | }


--------------------------------------------------------------------------------
/pengi/models/audio.py:
--------------------------------------------------------------------------------
1 | from .htsat import HTSATWrapper
2 | 
3 | def get_audio_encoder(name: str):
4 |     if name == "HTSAT":
5 |         return HTSATWrapper, 768
6 |     else:
7 |         raise Exception('The audio encoder name {} is incorrect or not supported'.format(name))


--------------------------------------------------------------------------------
/pengi/__init__.py:
--------------------------------------------------------------------------------
 1 | from .models.audio import get_audio_encoder
 2 | from .models.pengi import Projection
 3 | 
 4 | from .wrapper import PengiWrapper as Pengi
 5 | 
 6 | pengi = Pengi(config="base") 
 7 | pengi.args.classes_num = None
 8 | pengi.args.use_precomputed_melspec = False
 9 | pengi.args.pretrained_audioencoder_path = None
10 | 
11 | 
12 | process_audio_fn = pengi.preprocess_audio
13 | 


--------------------------------------------------------------------------------
/scripts/run_all_datasets_coop.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | bash scripts/beijing_opera.sh coop
 3 | bash scripts/crema_d.sh coop
 4 | bash scripts/esc50_actions.sh coop
 5 | bash scripts/esc50.sh coop
 6 | bash scripts/gt_music_genre.sh coop
 7 | bash scripts/ns_instruments.sh coop
 8 | bash scripts/ravdess.sh coop
 9 | bash scripts/sesa.sh coop
10 | bash scripts/tut.sh coop
11 | bash scripts/urban_sound.sh coop
12 | bash scripts/vocal_sound.sh coop


--------------------------------------------------------------------------------
/scripts/run_all_datasets_palm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | bash scripts/beijing_opera.sh palm
 3 | bash scripts/crema_d.sh palm
 4 | bash scripts/esc50_actions.sh palm
 5 | bash scripts/esc50.sh palm
 6 | bash scripts/gt_music_genre.sh palm
 7 | bash scripts/ns_instruments.sh palm
 8 | bash scripts/ravdess.sh palm
 9 | bash scripts/sesa.sh palm
10 | bash scripts/tut.sh palm
11 | bash scripts/urban_sound.sh palm
12 | bash scripts/vocal_sound.sh palm


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # PyTorch
 2 | --extra-index-url https://download.pytorch.org/whl/cu113
 3 | torch==1.11.0+cu113 
 4 | torchvision==0.12.0+cu113 
 5 | torchaudio==0.11.0 
 6 | 
 7 | # Others
 8 | numpy==1.23.0
 9 | pandas==2.0.3
10 | matplotlib==3.6.3
11 | scikit-learn==1.2.0
12 | notebook==6.5.6
13 | tabulate==0.9.0
14 | 
15 | torchlibrosa==0.1.0
16 | transformers==4.28.1
17 | PyYAML==6.0
18 | importlib_resources==5.12.0
19 | librosa==0.10.0.post2
20 | 


--------------------------------------------------------------------------------
/scripts/run_all_datasets_cocoop.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | bash scripts/beijing_opera.sh cocoop
 3 | bash scripts/crema_d.sh cocoop
 4 | bash scripts/esc50_actions.sh cocoop
 5 | bash scripts/esc50.sh cocoop
 6 | bash scripts/gt_music_genre.sh cocoop
 7 | bash scripts/ns_instruments.sh cocoop
 8 | bash scripts/ravdess.sh cocoop
 9 | bash scripts/sesa.sh cocoop
10 | bash scripts/tut.sh cocoop
11 | bash scripts/urban_sound.sh cocoop
12 | bash scripts/vocal_sound.sh cocoop


--------------------------------------------------------------------------------
/scripts/run_all_datasets_zeroshot.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | bash scripts/beijing_opera.sh zeroshot
 3 | bash scripts/crema_d.sh zeroshot
 4 | bash scripts/esc50_actions.sh zeroshot
 5 | bash scripts/esc50.sh zeroshot
 6 | bash scripts/gt_music_genre.sh zeroshot
 7 | bash scripts/ns_instruments.sh zeroshot
 8 | bash scripts/ravdess.sh zeroshot
 9 | bash scripts/sesa.sh zeroshot
10 | bash scripts/tut.sh zeroshot
11 | bash scripts/urban_sound.sh zeroshot
12 | bash scripts/vocal_sound.sh zeroshot


--------------------------------------------------------------------------------
/logs/zeroshot/accuracy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Beijing-Opera": [
 3 |     0.2881
 4 |   ],
 5 |   "CREMA-D": [
 6 |     0.231
 7 |   ],
 8 |   "ESC50-Actions": [
 9 |     0.6525
10 |   ],
11 |   "ESC50": [
12 |     0.4965
13 |   ],
14 |   "GT-Music-Genre": [
15 |     0.325
16 |   ],
17 |   "NS-Instruments": [
18 |     0.3291
19 |   ],
20 |   "RAVDESS": [
21 |     0.1222
22 |   ],
23 |   "SESA": [
24 |     0.7238
25 |   ],
26 |   "TUT2017": [
27 |     0.2435
28 |   ],
29 |   "UrbanSound8K": [
30 |     0.5349
31 |   ],
32 |   "VocalSound": [
33 |     0.4197
34 |   ]
35 | }


--------------------------------------------------------------------------------
/logs/zeroshot/f1_score.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Beijing-Opera": [
 3 |     0.1321
 4 |   ],
 5 |   "CREMA-D": [
 6 |     0.172
 7 |   ],
 8 |   "ESC50-Actions": [
 9 |     0.6138
10 |   ],
11 |   "ESC50": [
12 |     0.4478
13 |   ],
14 |   "GT-Music-Genre": [
15 |     0.2807
16 |   ],
17 |   "NS-Instruments": [
18 |     0.26
19 |   ],
20 |   "RAVDESS": [
21 |     0.0771
22 |   ],
23 |   "SESA": [
24 |     0.6827
25 |   ],
26 |   "TUT2017": [
27 |     0.1795
28 |   ],
29 |   "UrbanSound8K": [
30 |     0.5144
31 |   ],
32 |   "VocalSound": [
33 |     0.3834
34 |   ]
35 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50-Actions-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 1.0,
 4 |     "f1_score": 1.0,
 5 |     "precision": 1.0,
 6 |     "recall": 1.0,
 7 |     "avg_loss": 0.0033,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9875,
12 |     "f1_score": 0.9875,
13 |     "precision": 0.9889,
14 |     "recall": 0.9875,
15 |     "avg_loss": 0.0224,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 1.0,
20 |     "f1_score": 1.0,
21 |     "precision": 1.0,
22 |     "recall": 1.0,
23 |     "avg_loss": 0.0,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/Beijing-Opera-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 1.0,
 4 |     "f1_score": 1.0,
 5 |     "precision": 1.0,
 6 |     "recall": 1.0,
 7 |     "avg_loss": 0.0379,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9787,
12 |     "f1_score": 0.9799,
13 |     "precision": 0.9821,
14 |     "recall": 0.9792,
15 |     "avg_loss": 0.0701,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 1.0,
20 |     "f1_score": 1.0,
21 |     "precision": 1.0,
22 |     "recall": 1.0,
23 |     "avg_loss": 0.0225,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.956,
 4 |     "f1_score": 0.9544,
 5 |     "precision": 0.9633,
 6 |     "recall": 0.956,
 7 |     "avg_loss": 0.299,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.96,
12 |     "f1_score": 0.9591,
13 |     "precision": 0.9649,
14 |     "recall": 0.96,
15 |     "avg_loss": 0.2285,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.962,
20 |     "f1_score": 0.9614,
21 |     "precision": 0.9659,
22 |     "recall": 0.962,
23 |     "avg_loss": 0.2537,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.946,
 4 |     "f1_score": 0.9448,
 5 |     "precision": 0.9508,
 6 |     "recall": 0.946,
 7 |     "avg_loss": 0.1992,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.937,
12 |     "f1_score": 0.9351,
13 |     "precision": 0.9451,
14 |     "recall": 0.937,
15 |     "avg_loss": 0.2249,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.945,
20 |     "f1_score": 0.9431,
21 |     "precision": 0.9534,
22 |     "recall": 0.945,
23 |     "avg_loss": 0.2002,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50-Actions-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.975,
 4 |     "f1_score": 0.9739,
 5 |     "precision": 0.9778,
 6 |     "recall": 0.975,
 7 |     "avg_loss": 0.0883,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.95,
12 |     "f1_score": 0.9496,
13 |     "precision": 0.955,
14 |     "recall": 0.95,
15 |     "avg_loss": 0.1048,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 1.0,
20 |     "f1_score": 1.0,
21 |     "precision": 1.0,
22 |     "recall": 1.0,
23 |     "avg_loss": 0.0615,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50-Actions-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.975,
 4 |     "f1_score": 0.9739,
 5 |     "precision": 0.9778,
 6 |     "recall": 0.975,
 7 |     "avg_loss": 0.1317,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.975,
12 |     "f1_score": 0.9749,
13 |     "precision": 0.9778,
14 |     "recall": 0.975,
15 |     "avg_loss": 0.0766,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 1.0,
20 |     "f1_score": 1.0,
21 |     "precision": 1.0,
22 |     "recall": 1.0,
23 |     "avg_loss": 0.0551,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9525,
 4 |     "f1_score": 0.9516,
 5 |     "precision": 0.9557,
 6 |     "recall": 0.9525,
 7 |     "avg_loss": 0.1851,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.95,
12 |     "f1_score": 0.9488,
13 |     "precision": 0.9553,
14 |     "recall": 0.95,
15 |     "avg_loss": 0.1796,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.945,
20 |     "f1_score": 0.945,
21 |     "precision": 0.9512,
22 |     "recall": 0.945,
23 |     "avg_loss": 0.223,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.941,
 4 |     "f1_score": 0.9399,
 5 |     "precision": 0.9497,
 6 |     "recall": 0.941,
 7 |     "avg_loss": 0.2059,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.939,
12 |     "f1_score": 0.9369,
13 |     "precision": 0.9471,
14 |     "recall": 0.939,
15 |     "avg_loss": 0.2297,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9345,
20 |     "f1_score": 0.9335,
21 |     "precision": 0.9437,
22 |     "recall": 0.9345,
23 |     "avg_loss": 0.2171,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/SESA.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9143,
 4 |     "f1_score": 0.9044,
 5 |     "precision": 0.8993,
 6 |     "recall": 0.9143,
 7 |     "avg_loss": 0.2474,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8952,
12 |     "f1_score": 0.8959,
13 |     "precision": 0.9002,
14 |     "recall": 0.9008,
15 |     "avg_loss": 0.3017,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8762,
20 |     "f1_score": 0.8639,
21 |     "precision": 0.865,
22 |     "recall": 0.8696,
23 |     "avg_loss": 0.342,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50-Actions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.97,
 4 |     "f1_score": 0.9698,
 5 |     "precision": 0.9757,
 6 |     "recall": 0.97,
 7 |     "avg_loss": 0.2604,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9625,
12 |     "f1_score": 0.962,
13 |     "precision": 0.9677,
14 |     "recall": 0.9625,
15 |     "avg_loss": 0.2,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.965,
20 |     "f1_score": 0.9644,
21 |     "precision": 0.9689,
22 |     "recall": 0.965,
23 |     "avg_loss": 0.2197,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.965,
 4 |     "f1_score": 0.9633,
 5 |     "precision": 0.9685,
 6 |     "recall": 0.965,
 7 |     "avg_loss": 0.2119,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.965,
12 |     "f1_score": 0.9637,
13 |     "precision": 0.9675,
14 |     "recall": 0.965,
15 |     "avg_loss": 0.2543,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.96,
20 |     "f1_score": 0.9588,
21 |     "precision": 0.9634,
22 |     "recall": 0.96,
23 |     "avg_loss": 0.2516,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/SESA.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8857,
 4 |     "f1_score": 0.8955,
 5 |     "precision": 0.9062,
 6 |     "recall": 0.8907,
 7 |     "avg_loss": 0.5661,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9143,
12 |     "f1_score": 0.9197,
13 |     "precision": 0.9273,
14 |     "recall": 0.9213,
15 |     "avg_loss": 0.4241,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8857,
20 |     "f1_score": 0.8805,
21 |     "precision": 0.8909,
22 |     "recall": 0.894,
23 |     "avg_loss": 2.0808,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/Beijing-Opera-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9375,
 4 |     "f1_score": 0.9348,
 5 |     "precision": 0.9423,
 6 |     "recall": 0.9423,
 7 |     "avg_loss": 0.1414,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 1.0,
12 |     "f1_score": 1.0,
13 |     "precision": 1.0,
14 |     "recall": 1.0,
15 |     "avg_loss": 0.0241,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9792,
20 |     "f1_score": 0.9776,
21 |     "precision": 0.9821,
22 |     "recall": 0.975,
23 |     "avg_loss": 0.0527,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/Beijing-Opera-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9787,
 4 |     "f1_score": 0.9768,
 5 |     "precision": 0.9808,
 6 |     "recall": 0.975,
 7 |     "avg_loss": 0.0794,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9787,
12 |     "f1_score": 0.9772,
13 |     "precision": 0.9773,
14 |     "recall": 0.9792,
15 |     "avg_loss": 0.0862,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 1.0,
20 |     "f1_score": 1.0,
21 |     "precision": 1.0,
22 |     "recall": 1.0,
23 |     "avg_loss": 0.0596,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/Beijing-Opera-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9149,
 4 |     "f1_score": 0.9099,
 5 |     "precision": 0.925,
 6 |     "recall": 0.9126,
 7 |     "avg_loss": 0.2799,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9787,
12 |     "f1_score": 0.9776,
13 |     "precision": 0.9821,
14 |     "recall": 0.975,
15 |     "avg_loss": 0.0502,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 1.0,
20 |     "f1_score": 1.0,
21 |     "precision": 1.0,
22 |     "recall": 1.0,
23 |     "avg_loss": 0.0481,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50-Actions-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9875,
 4 |     "f1_score": 0.9875,
 5 |     "precision": 0.9889,
 6 |     "recall": 0.9875,
 7 |     "avg_loss": 0.0394,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.975,
12 |     "f1_score": 0.9749,
13 |     "precision": 0.9778,
14 |     "recall": 0.975,
15 |     "avg_loss": 0.0585,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 1.0,
20 |     "f1_score": 1.0,
21 |     "precision": 1.0,
22 |     "recall": 1.0,
23 |     "avg_loss": 0.0358,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50-Actions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.97,
 4 |     "f1_score": 0.9703,
 5 |     "precision": 0.9758,
 6 |     "recall": 0.97,
 7 |     "avg_loss": 0.0762,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9625,
12 |     "f1_score": 0.9621,
13 |     "precision": 0.9681,
14 |     "recall": 0.9625,
15 |     "avg_loss": 0.085,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.965,
20 |     "f1_score": 0.9648,
21 |     "precision": 0.971,
22 |     "recall": 0.965,
23 |     "avg_loss": 0.1053,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9575,
 4 |     "f1_score": 0.9572,
 5 |     "precision": 0.9623,
 6 |     "recall": 0.9575,
 7 |     "avg_loss": 0.1299,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.93,
12 |     "f1_score": 0.9273,
13 |     "precision": 0.9398,
14 |     "recall": 0.93,
15 |     "avg_loss": 0.3048,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.965,
20 |     "f1_score": 0.965,
21 |     "precision": 0.9685,
22 |     "recall": 0.965,
23 |     "avg_loss": 0.1443,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.92,
 4 |     "f1_score": 0.9177,
 5 |     "precision": 0.9295,
 6 |     "recall": 0.92,
 7 |     "avg_loss": 0.2466,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.915,
12 |     "f1_score": 0.9111,
13 |     "precision": 0.9263,
14 |     "recall": 0.915,
15 |     "avg_loss": 0.2212,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9325,
20 |     "f1_score": 0.9271,
21 |     "precision": 0.9431,
22 |     "recall": 0.9325,
23 |     "avg_loss": 0.2297,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.965,
 4 |     "f1_score": 0.9644,
 5 |     "precision": 0.9662,
 6 |     "recall": 0.965,
 7 |     "avg_loss": 0.129,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9575,
12 |     "f1_score": 0.9568,
13 |     "precision": 0.963,
14 |     "recall": 0.9575,
15 |     "avg_loss": 0.1317,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.945,
20 |     "f1_score": 0.9452,
21 |     "precision": 0.9569,
22 |     "recall": 0.945,
23 |     "avg_loss": 0.1732,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.935,
 4 |     "f1_score": 0.9335,
 5 |     "precision": 0.9398,
 6 |     "recall": 0.935,
 7 |     "avg_loss": 0.3011,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9375,
12 |     "f1_score": 0.9368,
13 |     "precision": 0.9464,
14 |     "recall": 0.9375,
15 |     "avg_loss": 0.2632,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.93,
20 |     "f1_score": 0.9269,
21 |     "precision": 0.94,
22 |     "recall": 0.93,
23 |     "avg_loss": 0.2595,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/SESA.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8381,
 4 |     "f1_score": 0.8425,
 5 |     "precision": 0.8542,
 6 |     "recall": 0.848,
 7 |     "avg_loss": 0.3378,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8762,
12 |     "f1_score": 0.8821,
13 |     "precision": 0.8908,
14 |     "recall": 0.8788,
15 |     "avg_loss": 0.2775,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8952,
20 |     "f1_score": 0.8847,
21 |     "precision": 0.8851,
22 |     "recall": 0.8949,
23 |     "avg_loss": 0.3547,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/Beijing-Opera-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 1.0,
 4 |     "f1_score": 1.0,
 5 |     "precision": 1.0,
 6 |     "recall": 1.0,
 7 |     "avg_loss": 0.0937,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9787,
12 |     "f1_score": 0.9772,
13 |     "precision": 0.9773,
14 |     "recall": 0.9792,
15 |     "avg_loss": 0.1427,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9574,
20 |     "f1_score": 0.9545,
21 |     "precision": 0.9583,
22 |     "recall": 0.9583,
23 |     "avg_loss": 0.1051,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/CREMA-D.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.313,
 4 |     "f1_score": 0.2609,
 5 |     "precision": 0.2909,
 6 |     "recall": 0.3439,
 7 |     "avg_loss": 1.7938,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.4197,
12 |     "f1_score": 0.2745,
13 |     "precision": 0.2817,
14 |     "recall": 0.3173,
15 |     "avg_loss": 1.5727,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.276,
20 |     "f1_score": 0.2389,
21 |     "precision": 0.2947,
22 |     "recall": 0.3498,
23 |     "avg_loss": 1.5965,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50-Actions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9625,
 4 |     "f1_score": 0.9618,
 5 |     "precision": 0.9667,
 6 |     "recall": 0.9625,
 7 |     "avg_loss": 0.1252,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.94,
12 |     "f1_score": 0.939,
13 |     "precision": 0.9465,
14 |     "recall": 0.94,
15 |     "avg_loss": 0.1606,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.955,
20 |     "f1_score": 0.9532,
21 |     "precision": 0.9635,
22 |     "recall": 0.955,
23 |     "avg_loss": 0.1533,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9225,
 4 |     "f1_score": 0.9209,
 5 |     "precision": 0.94,
 6 |     "recall": 0.9225,
 7 |     "avg_loss": 0.2018,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9425,
12 |     "f1_score": 0.9405,
13 |     "precision": 0.9487,
14 |     "recall": 0.9425,
15 |     "avg_loss": 0.1728,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.92,
20 |     "f1_score": 0.9174,
21 |     "precision": 0.9336,
22 |     "recall": 0.92,
23 |     "avg_loss": 0.1842,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9525,
 4 |     "f1_score": 0.9521,
 5 |     "precision": 0.9594,
 6 |     "recall": 0.9525,
 7 |     "avg_loss": 0.1646,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.95,
12 |     "f1_score": 0.9487,
13 |     "precision": 0.9573,
14 |     "recall": 0.95,
15 |     "avg_loss": 0.222,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9475,
20 |     "f1_score": 0.946,
21 |     "precision": 0.956,
22 |     "recall": 0.9475,
23 |     "avg_loss": 0.1604,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.945,
 4 |     "f1_score": 0.9432,
 5 |     "precision": 0.9529,
 6 |     "recall": 0.945,
 7 |     "avg_loss": 0.1947,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9225,
12 |     "f1_score": 0.9172,
13 |     "precision": 0.9352,
14 |     "recall": 0.9225,
15 |     "avg_loss": 0.2257,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9325,
20 |     "f1_score": 0.9313,
21 |     "precision": 0.9392,
22 |     "recall": 0.9325,
23 |     "avg_loss": 0.205,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9325,
 4 |     "f1_score": 0.9316,
 5 |     "precision": 0.9403,
 6 |     "recall": 0.9325,
 7 |     "avg_loss": 0.2835,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.93,
12 |     "f1_score": 0.9293,
13 |     "precision": 0.9389,
14 |     "recall": 0.93,
15 |     "avg_loss": 0.3484,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9275,
20 |     "f1_score": 0.9278,
21 |     "precision": 0.9386,
22 |     "recall": 0.9275,
23 |     "avg_loss": 0.3128,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/RAVDESS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.3849,
 4 |     "f1_score": 0.3668,
 5 |     "precision": 0.4042,
 6 |     "recall": 0.4284,
 7 |     "avg_loss": 1.6312,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.2688,
12 |     "f1_score": 0.2385,
13 |     "precision": 0.2657,
14 |     "recall": 0.2744,
15 |     "avg_loss": 1.9451,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.3422,
20 |     "f1_score": 0.3075,
21 |     "precision": 0.3023,
22 |     "recall": 0.3566,
23 |     "avg_loss": 1.6283,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/TUT2017.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6391,
 4 |     "f1_score": 0.6315,
 5 |     "precision": 0.6524,
 6 |     "recall": 0.6391,
 7 |     "avg_loss": 1.0158,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.6667,
12 |     "f1_score": 0.6596,
13 |     "precision": 0.6774,
14 |     "recall": 0.6667,
15 |     "avg_loss": 0.9857,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.6525,
20 |     "f1_score": 0.6409,
21 |     "precision": 0.6573,
22 |     "recall": 0.6525,
23 |     "avg_loss": 1.0111,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/VocalSound.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7162,
 4 |     "f1_score": 0.6953,
 5 |     "precision": 0.7226,
 6 |     "recall": 0.7163,
 7 |     "avg_loss": 0.8699,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7485,
12 |     "f1_score": 0.7481,
13 |     "precision": 0.7556,
14 |     "recall": 0.7486,
15 |     "avg_loss": 0.788,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.6642,
20 |     "f1_score": 0.6606,
21 |     "precision": 0.681,
22 |     "recall": 0.6642,
23 |     "avg_loss": 0.978,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/Beijing-Opera-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9375,
 4 |     "f1_score": 0.9348,
 5 |     "precision": 0.9423,
 6 |     "recall": 0.9423,
 7 |     "avg_loss": 1.5655,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9583,
12 |     "f1_score": 0.9594,
13 |     "precision": 0.9667,
14 |     "recall": 0.9583,
15 |     "avg_loss": 0.1225,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 1.0,
20 |     "f1_score": 1.0,
21 |     "precision": 1.0,
22 |     "recall": 1.0,
23 |     "avg_loss": 0.0007,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/Beijing-Opera-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 1.0,
 4 |     "f1_score": 1.0,
 5 |     "precision": 1.0,
 6 |     "recall": 1.0,
 7 |     "avg_loss": 0.0116,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9149,
12 |     "f1_score": 0.9132,
13 |     "precision": 0.9201,
14 |     "recall": 0.9173,
15 |     "avg_loss": 0.6439,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9574,
20 |     "f1_score": 0.9599,
21 |     "precision": 0.9643,
22 |     "recall": 0.9615,
23 |     "avg_loss": 0.1688,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/Beijing-Opera-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9787,
 4 |     "f1_score": 0.9772,
 5 |     "precision": 0.9773,
 6 |     "recall": 0.9792,
 7 |     "avg_loss": 0.1265,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9362,
12 |     "f1_score": 0.9331,
13 |     "precision": 0.9344,
14 |     "recall": 0.9333,
15 |     "avg_loss": 0.1467,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 1.0,
20 |     "f1_score": 1.0,
21 |     "precision": 1.0,
22 |     "recall": 1.0,
23 |     "avg_loss": 0.0,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/Beijing-Opera-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 1.0,
 4 |     "f1_score": 1.0,
 5 |     "precision": 1.0,
 6 |     "recall": 1.0,
 7 |     "avg_loss": 0.0015,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8936,
12 |     "f1_score": 0.8882,
13 |     "precision": 0.9107,
14 |     "recall": 0.8958,
15 |     "avg_loss": 0.411,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9787,
20 |     "f1_score": 0.9772,
21 |     "precision": 0.9773,
22 |     "recall": 0.9792,
23 |     "avg_loss": 0.2133,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/CREMA-D.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.4453,
 4 |     "f1_score": 0.3277,
 5 |     "precision": 0.334,
 6 |     "recall": 0.3604,
 7 |     "avg_loss": 2.9509,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.358,
12 |     "f1_score": 0.3083,
13 |     "precision": 0.3326,
14 |     "recall": 0.3725,
15 |     "avg_loss": 3.1415,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.2344,
20 |     "f1_score": 0.199,
21 |     "precision": 0.2792,
22 |     "recall": 0.2948,
23 |     "avg_loss": 2.7708,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.965,
 4 |     "f1_score": 0.9648,
 5 |     "precision": 0.9692,
 6 |     "recall": 0.965,
 7 |     "avg_loss": 0.2024,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.97,
12 |     "f1_score": 0.9697,
13 |     "precision": 0.9729,
14 |     "recall": 0.97,
15 |     "avg_loss": 0.1548,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9725,
20 |     "f1_score": 0.9722,
21 |     "precision": 0.9757,
22 |     "recall": 0.9725,
23 |     "avg_loss": 0.1136,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.95,
 4 |     "f1_score": 0.9497,
 5 |     "precision": 0.9625,
 6 |     "recall": 0.95,
 7 |     "avg_loss": 0.3991,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.955,
12 |     "f1_score": 0.9544,
13 |     "precision": 0.9594,
14 |     "recall": 0.955,
15 |     "avg_loss": 0.2121,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9475,
20 |     "f1_score": 0.9481,
21 |     "precision": 0.9528,
22 |     "recall": 0.9475,
23 |     "avg_loss": 0.3287,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9675,
 4 |     "f1_score": 0.967,
 5 |     "precision": 0.9729,
 6 |     "recall": 0.9675,
 7 |     "avg_loss": 0.1743,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.975,
12 |     "f1_score": 0.9742,
13 |     "precision": 0.9782,
14 |     "recall": 0.975,
15 |     "avg_loss": 0.1198,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9675,
20 |     "f1_score": 0.9667,
21 |     "precision": 0.9701,
22 |     "recall": 0.9675,
23 |     "avg_loss": 0.3094,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9325,
 4 |     "f1_score": 0.9274,
 5 |     "precision": 0.9435,
 6 |     "recall": 0.9325,
 7 |     "avg_loss": 0.5073,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.935,
12 |     "f1_score": 0.9333,
13 |     "precision": 0.9463,
14 |     "recall": 0.935,
15 |     "avg_loss": 0.4013,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9625,
20 |     "f1_score": 0.9614,
21 |     "precision": 0.9673,
22 |     "recall": 0.9625,
23 |     "avg_loss": 0.265,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/RAVDESS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.4562,
 4 |     "f1_score": 0.4486,
 5 |     "precision": 0.4651,
 6 |     "recall": 0.4848,
 7 |     "avg_loss": 2.4994,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.4603,
12 |     "f1_score": 0.4718,
13 |     "precision": 0.4628,
14 |     "recall": 0.4905,
15 |     "avg_loss": 2.6958,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.4623,
20 |     "f1_score": 0.4435,
21 |     "precision": 0.4707,
22 |     "recall": 0.4439,
23 |     "avg_loss": 2.505,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/TUT2017.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7959,
 4 |     "f1_score": 0.7942,
 5 |     "precision": 0.8073,
 6 |     "recall": 0.7959,
 7 |     "avg_loss": 0.9918,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8047,
12 |     "f1_score": 0.8024,
13 |     "precision": 0.809,
14 |     "recall": 0.8047,
15 |     "avg_loss": 0.9609,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7729,
20 |     "f1_score": 0.772,
21 |     "precision": 0.7846,
22 |     "recall": 0.7729,
23 |     "avg_loss": 1.0951,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/VocalSound.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8101,
 4 |     "f1_score": 0.8107,
 5 |     "precision": 0.8209,
 6 |     "recall": 0.81,
 7 |     "avg_loss": 1.5045,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8168,
12 |     "f1_score": 0.8179,
13 |     "precision": 0.8215,
14 |     "recall": 0.8168,
15 |     "avg_loss": 1.3647,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7964,
20 |     "f1_score": 0.7972,
21 |     "precision": 0.8021,
22 |     "recall": 0.7964,
23 |     "avg_loss": 1.4756,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/Beijing-Opera.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9577,
 4 |     "f1_score": 0.9556,
 5 |     "precision": 0.9613,
 6 |     "recall": 0.958,
 7 |     "avg_loss": 0.1251,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.983,
12 |     "f1_score": 0.9824,
13 |     "precision": 0.9838,
14 |     "recall": 0.9825,
15 |     "avg_loss": 0.054,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9916,
20 |     "f1_score": 0.991,
21 |     "precision": 0.9919,
22 |     "recall": 0.9908,
23 |     "avg_loss": 0.0446,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/CREMA-D.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.2539,
 4 |     "f1_score": 0.2267,
 5 |     "precision": 0.2911,
 6 |     "recall": 0.3371,
 7 |     "avg_loss": 1.7138,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.3358,
12 |     "f1_score": 0.2681,
13 |     "precision": 0.3493,
14 |     "recall": 0.3263,
15 |     "avg_loss": 1.6171,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.3156,
20 |     "f1_score": 0.2851,
21 |     "precision": 0.3206,
22 |     "recall": 0.3717,
23 |     "avg_loss": 1.5863,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50-Actions-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9625,
 4 |     "f1_score": 0.9633,
 5 |     "precision": 0.9727,
 6 |     "recall": 0.9625,
 7 |     "avg_loss": 0.1345,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.925,
12 |     "f1_score": 0.9235,
13 |     "precision": 0.938,
14 |     "recall": 0.925,
15 |     "avg_loss": 0.1394,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.95,
20 |     "f1_score": 0.9495,
21 |     "precision": 0.9578,
22 |     "recall": 0.95,
23 |     "avg_loss": 0.138,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9525,
 4 |     "f1_score": 0.9513,
 5 |     "precision": 0.9563,
 6 |     "recall": 0.9525,
 7 |     "avg_loss": 0.1892,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.945,
12 |     "f1_score": 0.9437,
13 |     "precision": 0.95,
14 |     "recall": 0.945,
15 |     "avg_loss": 0.2038,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9525,
20 |     "f1_score": 0.9511,
21 |     "precision": 0.9586,
22 |     "recall": 0.9525,
23 |     "avg_loss": 0.1943,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/GT-Music-Genre.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.75,
 4 |     "f1_score": 0.7383,
 5 |     "precision": 0.7553,
 6 |     "recall": 0.7659,
 7 |     "avg_loss": 0.7872,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.745,
12 |     "f1_score": 0.7417,
13 |     "precision": 0.7539,
14 |     "recall": 0.7565,
15 |     "avg_loss": 0.8638,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.76,
20 |     "f1_score": 0.7623,
21 |     "precision": 0.7691,
22 |     "recall": 0.7676,
23 |     "avg_loss": 0.8198,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/RAVDESS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.3727,
 4 |     "f1_score": 0.3634,
 5 |     "precision": 0.4068,
 6 |     "recall": 0.4308,
 7 |     "avg_loss": 1.7129,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.4399,
12 |     "f1_score": 0.4351,
13 |     "precision": 0.4518,
14 |     "recall": 0.4742,
15 |     "avg_loss": 1.5401,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.3523,
20 |     "f1_score": 0.3436,
21 |     "precision": 0.3837,
22 |     "recall": 0.3857,
23 |     "avg_loss": 1.6625,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/TUT2017-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.653,
 4 |     "f1_score": 0.6472,
 5 |     "precision": 0.6664,
 6 |     "recall": 0.653,
 7 |     "avg_loss": 1.0301,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.6692,
12 |     "f1_score": 0.6707,
13 |     "precision": 0.6866,
14 |     "recall": 0.6692,
15 |     "avg_loss": 1.0415,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.6462,
20 |     "f1_score": 0.6388,
21 |     "precision": 0.6532,
22 |     "recall": 0.6462,
23 |     "avg_loss": 0.9797,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/TUT2017-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7938,
 4 |     "f1_score": 0.7925,
 5 |     "precision": 0.81,
 6 |     "recall": 0.7939,
 7 |     "avg_loss": 0.6155,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7405,
12 |     "f1_score": 0.74,
13 |     "precision": 0.7625,
14 |     "recall": 0.7406,
15 |     "avg_loss": 0.7379,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7856,
20 |     "f1_score": 0.7813,
21 |     "precision": 0.8007,
22 |     "recall": 0.7856,
23 |     "avg_loss": 0.6484,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/TUT2017-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7732,
 4 |     "f1_score": 0.772,
 5 |     "precision": 0.7881,
 6 |     "recall": 0.7732,
 7 |     "avg_loss": 0.6548,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7547,
12 |     "f1_score": 0.749,
13 |     "precision": 0.7656,
14 |     "recall": 0.7547,
15 |     "avg_loss": 0.7676,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7436,
20 |     "f1_score": 0.7355,
21 |     "precision": 0.7566,
22 |     "recall": 0.7436,
23 |     "avg_loss": 0.811,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/TUT2017.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7499,
 4 |     "f1_score": 0.7474,
 5 |     "precision": 0.7631,
 6 |     "recall": 0.7499,
 7 |     "avg_loss": 0.7294,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7215,
12 |     "f1_score": 0.7198,
13 |     "precision": 0.7426,
14 |     "recall": 0.7216,
15 |     "avg_loss": 0.8406,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7312,
20 |     "f1_score": 0.7246,
21 |     "precision": 0.7438,
22 |     "recall": 0.7313,
23 |     "avg_loss": 0.7826,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/UrbanSound8K.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7576,
 4 |     "f1_score": 0.7581,
 5 |     "precision": 0.7739,
 6 |     "recall": 0.774,
 7 |     "avg_loss": 0.8955,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7784,
12 |     "f1_score": 0.7741,
13 |     "precision": 0.7904,
14 |     "recall": 0.7913,
15 |     "avg_loss": 0.9063,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7597,
20 |     "f1_score": 0.7555,
21 |     "precision": 0.762,
22 |     "recall": 0.7712,
23 |     "avg_loss": 0.8523,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/VocalSound.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8081,
 4 |     "f1_score": 0.8054,
 5 |     "precision": 0.8116,
 6 |     "recall": 0.8082,
 7 |     "avg_loss": 0.6976,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7825,
12 |     "f1_score": 0.7759,
13 |     "precision": 0.7935,
14 |     "recall": 0.7826,
15 |     "avg_loss": 0.7931,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7463,
20 |     "f1_score": 0.7451,
21 |     "precision": 0.7516,
22 |     "recall": 0.7463,
23 |     "avg_loss": 0.7528,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/Beijing-Opera.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9323,
 4 |     "f1_score": 0.9309,
 5 |     "precision": 0.9429,
 6 |     "recall": 0.9341,
 7 |     "avg_loss": 0.1794,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.966,
12 |     "f1_score": 0.965,
13 |     "precision": 0.9661,
14 |     "recall": 0.9671,
15 |     "avg_loss": 0.1301,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9619,
20 |     "f1_score": 0.9605,
21 |     "precision": 0.9617,
22 |     "recall": 0.9633,
23 |     "avg_loss": 0.1255,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50-Actions-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.975,
 4 |     "f1_score": 0.9749,
 5 |     "precision": 0.9778,
 6 |     "recall": 0.975,
 7 |     "avg_loss": 0.0926,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.925,
12 |     "f1_score": 0.9255,
13 |     "precision": 0.9314,
14 |     "recall": 0.925,
15 |     "avg_loss": 0.182,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.925,
20 |     "f1_score": 0.9182,
21 |     "precision": 0.9504,
22 |     "recall": 0.925,
23 |     "avg_loss": 0.2313,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50-Actions-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.95,
 4 |     "f1_score": 0.9492,
 5 |     "precision": 0.9533,
 6 |     "recall": 0.95,
 7 |     "avg_loss": 0.1188,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.925,
12 |     "f1_score": 0.9213,
13 |     "precision": 0.9327,
14 |     "recall": 0.925,
15 |     "avg_loss": 0.2505,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9125,
20 |     "f1_score": 0.9119,
21 |     "precision": 0.9203,
22 |     "recall": 0.9125,
23 |     "avg_loss": 0.228,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/GT-Music-Genre.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.725,
 4 |     "f1_score": 0.719,
 5 |     "precision": 0.7102,
 6 |     "recall": 0.7397,
 7 |     "avg_loss": 0.7874,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.695,
12 |     "f1_score": 0.6937,
13 |     "precision": 0.7301,
14 |     "recall": 0.7066,
15 |     "avg_loss": 0.9331,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.735,
20 |     "f1_score": 0.7153,
21 |     "precision": 0.7331,
22 |     "recall": 0.7532,
23 |     "avg_loss": 0.8354,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/TUT2017-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.5829,
 4 |     "f1_score": 0.5675,
 5 |     "precision": 0.5914,
 6 |     "recall": 0.5829,
 7 |     "avg_loss": 1.1393,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.5991,
12 |     "f1_score": 0.5936,
13 |     "precision": 0.6136,
14 |     "recall": 0.5991,
15 |     "avg_loss": 1.1859,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.5709,
20 |     "f1_score": 0.5566,
21 |     "precision": 0.5736,
22 |     "recall": 0.5709,
23 |     "avg_loss": 1.2427,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/TUT2017-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.649,
 4 |     "f1_score": 0.6452,
 5 |     "precision": 0.6799,
 6 |     "recall": 0.6489,
 7 |     "avg_loss": 0.9844,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.6661,
12 |     "f1_score": 0.6516,
13 |     "precision": 0.6762,
14 |     "recall": 0.6661,
15 |     "avg_loss": 0.93,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.6607,
20 |     "f1_score": 0.6422,
21 |     "precision": 0.6616,
22 |     "recall": 0.6607,
23 |     "avg_loss": 0.9242,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/TUT2017-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6467,
 4 |     "f1_score": 0.6406,
 5 |     "precision": 0.6513,
 6 |     "recall": 0.6467,
 7 |     "avg_loss": 0.989,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7046,
12 |     "f1_score": 0.7018,
13 |     "precision": 0.7207,
14 |     "recall": 0.7046,
15 |     "avg_loss": 0.9182,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7012,
20 |     "f1_score": 0.6994,
21 |     "precision": 0.7085,
22 |     "recall": 0.7012,
23 |     "avg_loss": 0.8826,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/TUT2017-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6778,
 4 |     "f1_score": 0.6727,
 5 |     "precision": 0.6871,
 6 |     "recall": 0.6778,
 7 |     "avg_loss": 0.9506,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.6969,
12 |     "f1_score": 0.6914,
13 |     "precision": 0.6993,
14 |     "recall": 0.6969,
15 |     "avg_loss": 0.9087,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.6772,
20 |     "f1_score": 0.6656,
21 |     "precision": 0.6856,
22 |     "recall": 0.6772,
23 |     "avg_loss": 0.9949,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/UrbanSound8K.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.76,
 4 |     "f1_score": 0.7577,
 5 |     "precision": 0.7704,
 6 |     "recall": 0.7707,
 7 |     "avg_loss": 0.8501,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7378,
12 |     "f1_score": 0.7382,
13 |     "precision": 0.7493,
14 |     "recall": 0.7549,
15 |     "avg_loss": 0.8992,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7666,
20 |     "f1_score": 0.761,
21 |     "precision": 0.7694,
22 |     "recall": 0.7758,
23 |     "avg_loss": 0.8789,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/Beijing-Opera.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9747,
 4 |     "f1_score": 0.9738,
 5 |     "precision": 0.9773,
 6 |     "recall": 0.9752,
 7 |     "avg_loss": 0.3628,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9066,
12 |     "f1_score": 0.9046,
13 |     "precision": 0.92,
14 |     "recall": 0.9069,
15 |     "avg_loss": 0.4189,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9787,
20 |     "f1_score": 0.9786,
21 |     "precision": 0.9797,
22 |     "recall": 0.9793,
23 |     "avg_loss": 0.0881,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50-Actions-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.975,
 4 |     "f1_score": 0.9749,
 5 |     "precision": 0.9778,
 6 |     "recall": 0.975,
 7 |     "avg_loss": 0.3221,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.95,
12 |     "f1_score": 0.9489,
13 |     "precision": 0.9542,
14 |     "recall": 0.95,
15 |     "avg_loss": 0.4286,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9375,
20 |     "f1_score": 0.9364,
21 |     "precision": 0.9431,
22 |     "recall": 0.9375,
23 |     "avg_loss": 0.5072,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50-Actions-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.975,
 4 |     "f1_score": 0.975,
 5 |     "precision": 0.9764,
 6 |     "recall": 0.975,
 7 |     "avg_loss": 0.1443,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.975,
12 |     "f1_score": 0.9749,
13 |     "precision": 0.9778,
14 |     "recall": 0.975,
15 |     "avg_loss": 0.0247,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.95,
20 |     "f1_score": 0.9497,
21 |     "precision": 0.9546,
22 |     "recall": 0.95,
23 |     "avg_loss": 0.2049,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/GT-Music-Genre.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.79,
 4 |     "f1_score": 0.7914,
 5 |     "precision": 0.7952,
 6 |     "recall": 0.7964,
 7 |     "avg_loss": 1.6149,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.785,
12 |     "f1_score": 0.7888,
13 |     "precision": 0.8077,
14 |     "recall": 0.7902,
15 |     "avg_loss": 1.4901,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.825,
20 |     "f1_score": 0.8257,
21 |     "precision": 0.8287,
22 |     "recall": 0.8298,
23 |     "avg_loss": 1.7199,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/NS-Instruments.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6394,
 4 |     "f1_score": 0.6418,
 5 |     "precision": 0.641,
 6 |     "recall": 0.6854,
 7 |     "avg_loss": 3.9794,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.6108,
12 |     "f1_score": 0.6062,
13 |     "precision": 0.6166,
14 |     "recall": 0.6508,
15 |     "avg_loss": 4.4305,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.6648,
20 |     "f1_score": 0.6834,
21 |     "precision": 0.6709,
22 |     "recall": 0.7294,
23 |     "avg_loss": 2.6739,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/TUT2017-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6897,
 4 |     "f1_score": 0.6849,
 5 |     "precision": 0.6986,
 6 |     "recall": 0.6897,
 7 |     "avg_loss": 1.6695,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7239,
12 |     "f1_score": 0.7201,
13 |     "precision": 0.7299,
14 |     "recall": 0.7239,
15 |     "avg_loss": 1.3417,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7077,
20 |     "f1_score": 0.6943,
21 |     "precision": 0.6996,
22 |     "recall": 0.7077,
23 |     "avg_loss": 1.3855,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/TUT2017-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8067,
 4 |     "f1_score": 0.8073,
 5 |     "precision": 0.8207,
 6 |     "recall": 0.8066,
 7 |     "avg_loss": 0.8389,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8215,
12 |     "f1_score": 0.8194,
13 |     "precision": 0.825,
14 |     "recall": 0.8216,
15 |     "avg_loss": 0.8609,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8007,
20 |     "f1_score": 0.8013,
21 |     "precision": 0.8094,
22 |     "recall": 0.8007,
23 |     "avg_loss": 0.9164,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/TUT2017-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8586,
 4 |     "f1_score": 0.8578,
 5 |     "precision": 0.8597,
 6 |     "recall": 0.8586,
 7 |     "avg_loss": 0.5988,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8383,
12 |     "f1_score": 0.8369,
13 |     "precision": 0.8433,
14 |     "recall": 0.8383,
15 |     "avg_loss": 0.8137,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7725,
20 |     "f1_score": 0.7826,
21 |     "precision": 0.8053,
22 |     "recall": 0.7725,
23 |     "avg_loss": 1.0751,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/TUT2017-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8288,
 4 |     "f1_score": 0.8268,
 5 |     "precision": 0.85,
 6 |     "recall": 0.8288,
 7 |     "avg_loss": 0.8601,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.835,
12 |     "f1_score": 0.8331,
13 |     "precision": 0.8377,
14 |     "recall": 0.835,
15 |     "avg_loss": 0.8274,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8105,
20 |     "f1_score": 0.8096,
21 |     "precision": 0.8242,
22 |     "recall": 0.8105,
23 |     "avg_loss": 1.0036,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/UrbanSound8K.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.812,
 4 |     "f1_score": 0.8158,
 5 |     "precision": 0.8271,
 6 |     "recall": 0.8243,
 7 |     "avg_loss": 1.7177,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8037,
12 |     "f1_score": 0.8071,
13 |     "precision": 0.8151,
14 |     "recall": 0.817,
15 |     "avg_loss": 1.8778,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8074,
20 |     "f1_score": 0.8101,
21 |     "precision": 0.8198,
22 |     "recall": 0.8185,
23 |     "avg_loss": 1.7769,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50-Actions-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9375,
 4 |     "f1_score": 0.9382,
 5 |     "precision": 0.9505,
 6 |     "recall": 0.9375,
 7 |     "avg_loss": 0.1149,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.975,
12 |     "f1_score": 0.9749,
13 |     "precision": 0.9778,
14 |     "recall": 0.975,
15 |     "avg_loss": 0.062,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9625,
20 |     "f1_score": 0.9621,
21 |     "precision": 0.9689,
22 |     "recall": 0.9625,
23 |     "avg_loss": 0.1324,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50-Actions-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9875,
 4 |     "f1_score": 0.9875,
 5 |     "precision": 0.9889,
 6 |     "recall": 0.9875,
 7 |     "avg_loss": 0.0455,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9875,
12 |     "f1_score": 0.9875,
13 |     "precision": 0.9889,
14 |     "recall": 0.9875,
15 |     "avg_loss": 0.0669,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.95,
20 |     "f1_score": 0.9498,
21 |     "precision": 0.9616,
22 |     "recall": 0.95,
23 |     "avg_loss": 0.1235,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/ESC50-Actions-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.975,
 4 |     "f1_score": 0.9749,
 5 |     "precision": 0.9778,
 6 |     "recall": 0.975,
 7 |     "avg_loss": 0.0466,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.95,
12 |     "f1_score": 0.9495,
13 |     "precision": 0.9578,
14 |     "recall": 0.95,
15 |     "avg_loss": 0.0982,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9625,
20 |     "f1_score": 0.9624,
21 |     "precision": 0.9667,
22 |     "recall": 0.9625,
23 |     "avg_loss": 0.0966,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/NS-Instruments.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.5996,
 4 |     "f1_score": 0.6199,
 5 |     "precision": 0.6246,
 6 |     "recall": 0.6578,
 7 |     "avg_loss": 1.6584,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.574,
12 |     "f1_score": 0.584,
13 |     "precision": 0.6134,
14 |     "recall": 0.6306,
15 |     "avg_loss": 1.6444,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.6438,
20 |     "f1_score": 0.6417,
21 |     "precision": 0.6403,
22 |     "recall": 0.6877,
23 |     "avg_loss": 1.4087,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/TUT2017-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7796,
 4 |     "f1_score": 0.7779,
 5 |     "precision": 0.7881,
 6 |     "recall": 0.7796,
 7 |     "avg_loss": 0.6173,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7217,
12 |     "f1_score": 0.7193,
13 |     "precision": 0.7555,
14 |     "recall": 0.7217,
15 |     "avg_loss": 0.8156,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7496,
20 |     "f1_score": 0.7426,
21 |     "precision": 0.7647,
22 |     "recall": 0.7497,
23 |     "avg_loss": 0.6913,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/UrbanSound8K-FOLD10.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.81,
 4 |     "f1_score": 0.8011,
 5 |     "precision": 0.7986,
 6 |     "recall": 0.8242,
 7 |     "avg_loss": 0.7701,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7252,
12 |     "f1_score": 0.7244,
13 |     "precision": 0.7523,
14 |     "recall": 0.7459,
15 |     "avg_loss": 0.953,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8076,
20 |     "f1_score": 0.8007,
21 |     "precision": 0.8031,
22 |     "recall": 0.8181,
23 |     "avg_loss": 0.7598,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/UrbanSound8K-FOLD9.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7549,
 4 |     "f1_score": 0.7627,
 5 |     "precision": 0.7798,
 6 |     "recall": 0.7789,
 7 |     "avg_loss": 1.1373,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8554,
12 |     "f1_score": 0.8661,
13 |     "precision": 0.874,
14 |     "recall": 0.872,
15 |     "avg_loss": 0.6662,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7659,
20 |     "f1_score": 0.7651,
21 |     "precision": 0.7795,
22 |     "recall": 0.7904,
23 |     "avg_loss": 1.0182,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/Beijing-Opera-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8958,
 4 |     "f1_score": 0.8942,
 5 |     "precision": 0.909,
 6 |     "recall": 0.9006,
 7 |     "avg_loss": 0.2626,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9792,
12 |     "f1_score": 0.9781,
13 |     "precision": 0.9773,
14 |     "recall": 0.9808,
15 |     "avg_loss": 0.0817,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9583,
20 |     "f1_score": 0.9564,
21 |     "precision": 0.9583,
22 |     "recall": 0.9615,
23 |     "avg_loss": 0.162,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/Beijing-Opera-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9574,
 4 |     "f1_score": 0.9564,
 5 |     "precision": 0.9583,
 6 |     "recall": 0.9599,
 7 |     "avg_loss": 0.1015,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9787,
12 |     "f1_score": 0.9799,
13 |     "precision": 0.9821,
14 |     "recall": 0.9792,
15 |     "avg_loss": 0.0963,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9787,
20 |     "f1_score": 0.98,
21 |     "precision": 0.9808,
22 |     "recall": 0.9808,
23 |     "avg_loss": 0.1062,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/Beijing-Opera-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9149,
 4 |     "f1_score": 0.9148,
 5 |     "precision": 0.9286,
 6 |     "recall": 0.9199,
 7 |     "avg_loss": 0.1992,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9362,
12 |     "f1_score": 0.9334,
13 |     "precision": 0.9353,
14 |     "recall": 0.9349,
15 |     "avg_loss": 0.18,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9787,
20 |     "f1_score": 0.9772,
21 |     "precision": 0.9773,
22 |     "recall": 0.9792,
23 |     "avg_loss": 0.0778,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/ESC50-Actions-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9375,
 4 |     "f1_score": 0.937,
 5 |     "precision": 0.9467,
 6 |     "recall": 0.9375,
 7 |     "avg_loss": 0.1948,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.925,
12 |     "f1_score": 0.9235,
13 |     "precision": 0.9356,
14 |     "recall": 0.925,
15 |     "avg_loss": 0.1893,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9375,
20 |     "f1_score": 0.936,
21 |     "precision": 0.9467,
22 |     "recall": 0.9375,
23 |     "avg_loss": 0.1906,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/NS-Instruments.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.5728,
 4 |     "f1_score": 0.6028,
 5 |     "precision": 0.6047,
 6 |     "recall": 0.6526,
 7 |     "avg_loss": 1.7179,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.5562,
12 |     "f1_score": 0.5593,
13 |     "precision": 0.5862,
14 |     "recall": 0.5856,
15 |     "avg_loss": 1.7017,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.6177,
20 |     "f1_score": 0.6153,
21 |     "precision": 0.6119,
22 |     "recall": 0.6627,
23 |     "avg_loss": 1.3542,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/UrbanSound8K-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7789,
 4 |     "f1_score": 0.7811,
 5 |     "precision": 0.7993,
 6 |     "recall": 0.7941,
 7 |     "avg_loss": 0.6321,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7468,
12 |     "f1_score": 0.7454,
13 |     "precision": 0.7532,
14 |     "recall": 0.7691,
15 |     "avg_loss": 0.784,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7732,
20 |     "f1_score": 0.7745,
21 |     "precision": 0.7721,
22 |     "recall": 0.7871,
23 |     "avg_loss": 0.7014,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/UrbanSound8K-FOLD10.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7754,
 4 |     "f1_score": 0.7775,
 5 |     "precision": 0.7839,
 6 |     "recall": 0.7903,
 7 |     "avg_loss": 0.8331,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.6941,
12 |     "f1_score": 0.702,
13 |     "precision": 0.7414,
14 |     "recall": 0.719,
15 |     "avg_loss": 1.1581,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7873,
20 |     "f1_score": 0.7839,
21 |     "precision": 0.7862,
22 |     "recall": 0.8029,
23 |     "avg_loss": 0.7831,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/UrbanSound8K-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7928,
 4 |     "f1_score": 0.7821,
 5 |     "precision": 0.7988,
 6 |     "recall": 0.7979,
 7 |     "avg_loss": 0.7417,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7917,
12 |     "f1_score": 0.7963,
13 |     "precision": 0.8064,
14 |     "recall": 0.8116,
15 |     "avg_loss": 0.6845,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8108,
20 |     "f1_score": 0.8096,
21 |     "precision": 0.8095,
22 |     "recall": 0.8131,
23 |     "avg_loss": 0.6086,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/UrbanSound8K-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7243,
 4 |     "f1_score": 0.7125,
 5 |     "precision": 0.7168,
 6 |     "recall": 0.7242,
 7 |     "avg_loss": 0.926,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.6995,
12 |     "f1_score": 0.7023,
13 |     "precision": 0.7035,
14 |     "recall": 0.7081,
15 |     "avg_loss": 1.1094,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7341,
20 |     "f1_score": 0.7193,
21 |     "precision": 0.7403,
22 |     "recall": 0.7325,
23 |     "avg_loss": 1.0645,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/UrbanSound8K-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8131,
 4 |     "f1_score": 0.8032,
 5 |     "precision": 0.8217,
 6 |     "recall": 0.8023,
 7 |     "avg_loss": 0.5833,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7273,
12 |     "f1_score": 0.7297,
13 |     "precision": 0.7545,
14 |     "recall": 0.735,
15 |     "avg_loss": 0.9209,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7596,
20 |     "f1_score": 0.7556,
21 |     "precision": 0.7802,
22 |     "recall": 0.7483,
23 |     "avg_loss": 0.8186,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/UrbanSound8K-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7917,
 4 |     "f1_score": 0.782,
 5 |     "precision": 0.8052,
 6 |     "recall": 0.8015,
 7 |     "avg_loss": 0.6773,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.765,
12 |     "f1_score": 0.7591,
13 |     "precision": 0.7727,
14 |     "recall": 0.7773,
15 |     "avg_loss": 0.7294,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8184,
20 |     "f1_score": 0.8211,
21 |     "precision": 0.8255,
22 |     "recall": 0.8275,
23 |     "avg_loss": 0.609,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/UrbanSound8K-FOLD6.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6865,
 4 |     "f1_score": 0.672,
 5 |     "precision": 0.6791,
 6 |     "recall": 0.7058,
 7 |     "avg_loss": 1.3617,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7217,
12 |     "f1_score": 0.7164,
13 |     "precision": 0.7172,
14 |     "recall": 0.7301,
15 |     "avg_loss": 1.0431,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7193,
20 |     "f1_score": 0.7102,
21 |     "precision": 0.7213,
22 |     "recall": 0.7253,
23 |     "avg_loss": 1.3693,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/UrbanSound8K-FOLD7.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7446,
 4 |     "f1_score": 0.7334,
 5 |     "precision": 0.7425,
 6 |     "recall": 0.7478,
 7 |     "avg_loss": 0.7803,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7566,
12 |     "f1_score": 0.7427,
13 |     "precision": 0.7394,
14 |     "recall": 0.7714,
15 |     "avg_loss": 0.7967,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7041,
20 |     "f1_score": 0.6876,
21 |     "precision": 0.6949,
22 |     "recall": 0.7179,
23 |     "avg_loss": 0.9885,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/UrbanSound8K-FOLD8.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7109,
 4 |     "f1_score": 0.7294,
 5 |     "precision": 0.7394,
 6 |     "recall": 0.7349,
 7 |     "avg_loss": 1.0237,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.6737,
12 |     "f1_score": 0.6752,
13 |     "precision": 0.6923,
14 |     "recall": 0.7022,
15 |     "avg_loss": 1.0644,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7494,
20 |     "f1_score": 0.7349,
21 |     "precision": 0.7359,
22 |     "recall": 0.7768,
23 |     "avg_loss": 0.9744,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/UrbanSound8K-FOLD9.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7819,
 4 |     "f1_score": 0.804,
 5 |     "precision": 0.817,
 6 |     "recall": 0.8087,
 7 |     "avg_loss": 0.9417,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8015,
12 |     "f1_score": 0.8127,
13 |     "precision": 0.8125,
14 |     "recall": 0.8252,
15 |     "avg_loss": 0.7012,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.81,
20 |     "f1_score": 0.8133,
21 |     "precision": 0.8281,
22 |     "recall": 0.8267,
23 |     "avg_loss": 0.8716,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50-Actions-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9375,
 4 |     "f1_score": 0.9368,
 5 |     "precision": 0.9556,
 6 |     "recall": 0.9375,
 7 |     "avg_loss": 0.5431,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9875,
12 |     "f1_score": 0.9875,
13 |     "precision": 0.9889,
14 |     "recall": 0.9875,
15 |     "avg_loss": 0.05,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9625,
20 |     "f1_score": 0.9614,
21 |     "precision": 0.9667,
22 |     "recall": 0.9625,
23 |     "avg_loss": 0.1677,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/ESC50-Actions-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9625,
 4 |     "f1_score": 0.9621,
 5 |     "precision": 0.9689,
 6 |     "recall": 0.9625,
 7 |     "avg_loss": 0.2894,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9125,
12 |     "f1_score": 0.9113,
13 |     "precision": 0.9288,
14 |     "recall": 0.9125,
15 |     "avg_loss": 0.4741,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.975,
20 |     "f1_score": 0.9746,
21 |     "precision": 0.98,
22 |     "recall": 0.975,
23 |     "avg_loss": 0.2188,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/UrbanSound8K-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8133,
 4 |     "f1_score": 0.8277,
 5 |     "precision": 0.8349,
 6 |     "recall": 0.8318,
 7 |     "avg_loss": 1.4857,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8373,
12 |     "f1_score": 0.8339,
13 |     "precision": 0.8351,
14 |     "recall": 0.8554,
15 |     "avg_loss": 1.4359,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7973,
20 |     "f1_score": 0.8035,
21 |     "precision": 0.8055,
22 |     "recall": 0.8186,
23 |     "avg_loss": 1.2868,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/UrbanSound8K-FOLD10.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8435,
 4 |     "f1_score": 0.8489,
 5 |     "precision": 0.8482,
 6 |     "recall": 0.8583,
 7 |     "avg_loss": 1.0822,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7384,
12 |     "f1_score": 0.7407,
13 |     "precision": 0.7672,
14 |     "recall": 0.7564,
15 |     "avg_loss": 2.1358,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8124,
20 |     "f1_score": 0.8161,
21 |     "precision": 0.8294,
22 |     "recall": 0.8302,
23 |     "avg_loss": 1.544,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/UrbanSound8K-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8041,
 4 |     "f1_score": 0.8121,
 5 |     "precision": 0.8225,
 6 |     "recall": 0.8243,
 7 |     "avg_loss": 1.8437,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8435,
12 |     "f1_score": 0.8508,
13 |     "precision": 0.8504,
14 |     "recall": 0.8615,
15 |     "avg_loss": 0.9204,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8559,
20 |     "f1_score": 0.8643,
21 |     "precision": 0.8756,
22 |     "recall": 0.8661,
23 |     "avg_loss": 1.2119,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/UrbanSound8K-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7395,
 4 |     "f1_score": 0.7352,
 5 |     "precision": 0.7541,
 6 |     "recall": 0.7428,
 7 |     "avg_loss": 1.9464,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7438,
12 |     "f1_score": 0.7564,
13 |     "precision": 0.7773,
14 |     "recall": 0.7535,
15 |     "avg_loss": 2.3971,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7146,
20 |     "f1_score": 0.7071,
21 |     "precision": 0.7215,
22 |     "recall": 0.7158,
23 |     "avg_loss": 2.7911,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/UrbanSound8K-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8687,
 4 |     "f1_score": 0.8619,
 5 |     "precision": 0.8801,
 6 |     "recall": 0.8571,
 7 |     "avg_loss": 0.9007,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8384,
12 |     "f1_score": 0.8364,
13 |     "precision": 0.8466,
14 |     "recall": 0.8365,
15 |     "avg_loss": 1.2304,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8485,
20 |     "f1_score": 0.8425,
21 |     "precision": 0.8637,
22 |     "recall": 0.8399,
23 |     "avg_loss": 1.466,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/UrbanSound8K-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8419,
 4 |     "f1_score": 0.8357,
 5 |     "precision": 0.8597,
 6 |     "recall": 0.8506,
 7 |     "avg_loss": 1.1243,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8697,
12 |     "f1_score": 0.8677,
13 |     "precision": 0.8678,
14 |     "recall": 0.8766,
15 |     "avg_loss": 0.967,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8291,
20 |     "f1_score": 0.8319,
21 |     "precision": 0.8485,
22 |     "recall": 0.8374,
23 |     "avg_loss": 0.9734,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/UrbanSound8K-FOLD6.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.819,
 4 |     "f1_score": 0.8182,
 5 |     "precision": 0.8215,
 6 |     "recall": 0.8282,
 7 |     "avg_loss": 3.1229,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7789,
12 |     "f1_score": 0.7856,
13 |     "precision": 0.7962,
14 |     "recall": 0.7885,
15 |     "avg_loss": 3.9193,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7485,
20 |     "f1_score": 0.7447,
21 |     "precision": 0.7539,
22 |     "recall": 0.7571,
23 |     "avg_loss": 3.7871,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/UrbanSound8K-FOLD7.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7721,
 4 |     "f1_score": 0.7742,
 5 |     "precision": 0.7832,
 6 |     "recall": 0.7823,
 7 |     "avg_loss": 2.3182,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7995,
12 |     "f1_score": 0.7954,
13 |     "precision": 0.7974,
14 |     "recall": 0.8059,
15 |     "avg_loss": 1.4955,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8162,
20 |     "f1_score": 0.8171,
21 |     "precision": 0.8159,
22 |     "recall": 0.824,
23 |     "avg_loss": 1.8344,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/UrbanSound8K-FOLD8.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8102,
 4 |     "f1_score": 0.8298,
 5 |     "precision": 0.8463,
 6 |     "recall": 0.8329,
 7 |     "avg_loss": 1.1996,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7233,
12 |     "f1_score": 0.7308,
13 |     "precision": 0.7358,
14 |     "recall": 0.7516,
15 |     "avg_loss": 2.7501,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7866,
20 |     "f1_score": 0.8035,
21 |     "precision": 0.8115,
22 |     "recall": 0.808,
23 |     "avg_loss": 1.2759,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/UrbanSound8K-FOLD9.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8076,
 4 |     "f1_score": 0.8141,
 5 |     "precision": 0.8201,
 6 |     "recall": 0.835,
 7 |     "avg_loss": 2.1535,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.864,
12 |     "f1_score": 0.8735,
13 |     "precision": 0.8773,
14 |     "recall": 0.8841,
15 |     "avg_loss": 1.5268,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8652,
20 |     "f1_score": 0.8707,
21 |     "precision": 0.8723,
22 |     "recall": 0.8877,
23 |     "avg_loss": 1.598,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/Beijing-Opera-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9574,
 4 |     "f1_score": 0.9564,
 5 |     "precision": 0.9583,
 6 |     "recall": 0.9599,
 7 |     "avg_loss": 0.0868,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9787,
12 |     "f1_score": 0.9772,
13 |     "precision": 0.9773,
14 |     "recall": 0.9792,
15 |     "avg_loss": 0.0396,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9787,
20 |     "f1_score": 0.9772,
21 |     "precision": 0.9773,
22 |     "recall": 0.9792,
23 |     "avg_loss": 0.0403,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/UrbanSound8K-FOLD1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7537,
 4 |     "f1_score": 0.7596,
 5 |     "precision": 0.7849,
 6 |     "recall": 0.7752,
 7 |     "avg_loss": 0.7558,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7526,
12 |     "f1_score": 0.751,
13 |     "precision": 0.7728,
14 |     "recall": 0.7795,
15 |     "avg_loss": 0.8645,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7388,
20 |     "f1_score": 0.7397,
21 |     "precision": 0.7537,
22 |     "recall": 0.7655,
23 |     "avg_loss": 0.7558,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/UrbanSound8K-FOLD2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7489,
 4 |     "f1_score": 0.7506,
 5 |     "precision": 0.7562,
 6 |     "recall": 0.7753,
 7 |     "avg_loss": 0.9968,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8637,
12 |     "f1_score": 0.8643,
13 |     "precision": 0.8656,
14 |     "recall": 0.8715,
15 |     "avg_loss": 0.5291,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8074,
20 |     "f1_score": 0.8016,
21 |     "precision": 0.8046,
22 |     "recall": 0.8087,
23 |     "avg_loss": 0.6422,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/UrbanSound8K-FOLD3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6714,
 4 |     "f1_score": 0.6635,
 5 |     "precision": 0.6838,
 6 |     "recall": 0.6878,
 7 |     "avg_loss": 1.0453,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7492,
12 |     "f1_score": 0.7517,
13 |     "precision": 0.7647,
14 |     "recall": 0.7504,
15 |     "avg_loss": 0.9332,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7027,
20 |     "f1_score": 0.6934,
21 |     "precision": 0.6926,
22 |     "recall": 0.7052,
23 |     "avg_loss": 0.9017,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/UrbanSound8K-FOLD4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7889,
 4 |     "f1_score": 0.7781,
 5 |     "precision": 0.7937,
 6 |     "recall": 0.781,
 7 |     "avg_loss": 0.6893,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7313,
12 |     "f1_score": 0.7134,
13 |     "precision": 0.7518,
14 |     "recall": 0.7299,
15 |     "avg_loss": 1.1393,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7586,
20 |     "f1_score": 0.7523,
21 |     "precision": 0.7693,
22 |     "recall": 0.7494,
23 |     "avg_loss": 0.8274,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/UrbanSound8K-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8408,
 4 |     "f1_score": 0.8328,
 5 |     "precision": 0.8447,
 6 |     "recall": 0.8505,
 7 |     "avg_loss": 0.5257,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7788,
12 |     "f1_score": 0.7589,
13 |     "precision": 0.7926,
14 |     "recall": 0.7887,
15 |     "avg_loss": 0.7447,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.8109,
20 |     "f1_score": 0.8168,
21 |     "precision": 0.8254,
22 |     "recall": 0.8221,
23 |     "avg_loss": 0.5836,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/UrbanSound8K-FOLD6.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7886,
 4 |     "f1_score": 0.7851,
 5 |     "precision": 0.7945,
 6 |     "recall": 0.7998,
 7 |     "avg_loss": 0.855,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7327,
12 |     "f1_score": 0.7281,
13 |     "precision": 0.7453,
14 |     "recall": 0.7462,
15 |     "avg_loss": 1.6305,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.6902,
20 |     "f1_score": 0.6733,
21 |     "precision": 0.6809,
22 |     "recall": 0.6929,
23 |     "avg_loss": 1.2978,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/UrbanSound8K-FOLD7.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.7303,
 4 |     "f1_score": 0.7184,
 5 |     "precision": 0.7584,
 6 |     "recall": 0.7424,
 7 |     "avg_loss": 0.9887,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8126,
12 |     "f1_score": 0.8128,
13 |     "precision": 0.8131,
14 |     "recall": 0.8244,
15 |     "avg_loss": 0.7321,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7792,
20 |     "f1_score": 0.7775,
21 |     "precision": 0.7745,
22 |     "recall": 0.7965,
23 |     "avg_loss": 0.7008,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/cocoop/UrbanSound8K-FOLD8.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.6886,
 4 |     "f1_score": 0.7292,
 5 |     "precision": 0.7442,
 6 |     "recall": 0.7249,
 7 |     "avg_loss": 1.1906,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.7829,
12 |     "f1_score": 0.7703,
13 |     "precision": 0.7714,
14 |     "recall": 0.8049,
15 |     "avg_loss": 0.8706,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.7357,
20 |     "f1_score": 0.7342,
21 |     "precision": 0.7368,
22 |     "recall": 0.7631,
23 |     "avg_loss": 1.0355,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/coop/Beijing-Opera-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.8936,
 4 |     "f1_score": 0.8892,
 5 |     "precision": 0.9184,
 6 |     "recall": 0.8899,
 7 |     "avg_loss": 0.2399,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.9574,
12 |     "f1_score": 0.9564,
13 |     "precision": 0.9583,
14 |     "recall": 0.9615,
15 |     "avg_loss": 0.1499,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9362,
20 |     "f1_score": 0.9343,
21 |     "precision": 0.9337,
22 |     "recall": 0.9365,
23 |     "avg_loss": 0.1762,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/logs/palm/Beijing-Opera-FOLD5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "seed_0": {
 3 |     "accuracy": 0.9574,
 4 |     "f1_score": 0.9571,
 5 |     "precision": 0.9667,
 6 |     "recall": 0.9545,
 7 |     "avg_loss": 0.1089,
 8 |     "epoch": 49
 9 |   },
10 |   "seed_1": {
11 |     "accuracy": 0.8298,
12 |     "f1_score": 0.8293,
13 |     "precision": 0.8681,
14 |     "recall": 0.8299,
15 |     "avg_loss": 0.7705,
16 |     "epoch": 49
17 |   },
18 |   "seed_2": {
19 |     "accuracy": 0.9574,
20 |     "f1_score": 0.9557,
21 |     "precision": 0.9571,
22 |     "recall": 0.9558,
23 |     "avg_loss": 0.0579,
24 |     "epoch": 49
25 |   }
26 | }


--------------------------------------------------------------------------------
/pengi/configs/base.yml:
--------------------------------------------------------------------------------
 1 | # TEXT ENCODER CONFIG
 2 | use_text_model: True
 3 | text_model: 'openai/clip-vit-base-patch16'
 4 | transformer_embed_dim: 512
 5 | freeze_text_encoder_weights: True
 6 | use_pretrained_clap_weights: False
 7 | 
 8 | # AUDIO ENCODER CONFIG
 9 | audioenc_name: 'HTSAT'
10 | out_emb: 768
11 | fmin: 50
12 | fmax: 8000
13 | n_fft: 1024
14 | hop_size: 320
15 | mel_bins: 64
16 | window_size: 1024
17 | specaug: False
18 | mixup: False
19 | use_pretrained_audioencoder: False
20 | freeze_audio_encoder_weights: False
21 | 
22 | # CLAP PROJECTION SPACE CONFIG 
23 | d_proj: 1024
24 | 
25 | # DATASET CONFIGS
26 | dataset_config: 
27 |   sampling_rate: 44100
28 |   duration: 7
29 |   enc_text_len: 40
30 |   dec_text_len: 77
31 | 
32 | # DECODER CONFIG
33 | text_decoder: 'gpt2'
34 | prefix_length: 40
35 | prefix_length_clip: 40
36 | mapping_type: 'transformer'
37 | num_layers: 8
38 | normalize_prefix: True
39 | freeze_gpt_weights: True


--------------------------------------------------------------------------------
/pengi/configs/base_no_text_enc.yml:
--------------------------------------------------------------------------------
 1 | # TEXT ENCODER CONFIG
 2 | use_text_model: False
 3 | text_model: 'openai/clip-vit-base-patch16'
 4 | transformer_embed_dim: 512
 5 | freeze_text_encoder_weights: True
 6 | use_pretrained_clap_weights: False
 7 | 
 8 | # AUDIO ENCODER CONFIG
 9 | audioenc_name: 'HTSAT'
10 | out_emb: 768
11 | fmin: 50
12 | fmax: 8000
13 | n_fft: 1024
14 | hop_size: 320
15 | mel_bins: 64
16 | window_size: 1024
17 | specaug: False
18 | mixup: False
19 | use_pretrained_audioencoder: False
20 | freeze_audio_encoder_weights: False
21 | 
22 | # CLAP PROJECTION SPACE CONFIG 
23 | d_proj: 1024
24 | 
25 | # DATASET CONFIGS
26 | dataset_config: 
27 |   sampling_rate: 44100
28 |   duration: 7
29 |   enc_text_len: 40
30 |   dec_text_len: 77
31 | 
32 | # DECODER CONFIG
33 | text_decoder: 'gpt2'
34 | prefix_length: 40
35 | prefix_length_clip: 40
36 | mapping_type: 'transformer'
37 | num_layers: 8
38 | normalize_prefix: True
39 | freeze_gpt_weights: True


--------------------------------------------------------------------------------
/scripts/run_all_datasets_all_methods.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | for MODEL in zeroshot coop cocoop palm;  do  sh scripts/beijing_opera.sh $MODEL;   done
 3 | for MODEL in zeroshot coop cocoop palm;  do  sh scripts/crema_d.sh $MODEL;   done
 4 | for MODEL in zeroshot coop cocoop palm;  do  sh scripts/esc50_actions.sh $MODEL;   done
 5 | for MODEL in zeroshot coop cocoop palm;  do  sh scripts/esc50.sh $MODEL;   done
 6 | for MODEL in zeroshot coop cocoop palm;  do  sh scripts/gt_music_genre.sh $MODEL;   done
 7 | for MODEL in zeroshot coop cocoop palm;  do  sh scripts/ns_instruments.sh $MODEL;   done
 8 | for MODEL in zeroshot coop cocoop palm;  do  sh scripts/ravdess.sh $MODEL;   done
 9 | for MODEL in zeroshot coop cocoop palm;  do  sh scripts/sesa.sh $MODEL;   done
10 | for MODEL in zeroshot coop cocoop palm;  do  sh scripts/tut.sh $MODEL;   done
11 | for MODEL in zeroshot coop cocoop palm;  do  sh scripts/urban_sound.sh $MODEL;   done
12 | for MODEL in zeroshot coop cocoop palm;  do  sh scripts/vocal_sound.sh $MODEL;   done


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Asif Hanif
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/logs/coop/accuracy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Beijing-Opera": [
 3 |     0.9323,
 4 |     0.966,
 5 |     0.9619,
 6 |     0.9533999999999999
 7 |   ],
 8 |   "CREMA-D": [
 9 |     0.313,
10 |     0.4197,
11 |     0.276,
12 |     0.3362333333333334
13 |   ],
14 |   "ESC50-Actions": [
15 |     0.9625,
16 |     0.94,
17 |     0.955,
18 |     0.9525
19 |   ],
20 |   "ESC50": [
21 |     0.941,
22 |     0.939,
23 |     0.9345,
24 |     0.9381666666666666
25 |   ],
26 |   "GT-Music-Genre": [
27 |     0.725,
28 |     0.695,
29 |     0.735,
30 |     0.7183333333333333
31 |   ],
32 |   "NS-Instruments": [
33 |     0.5728,
34 |     0.5562,
35 |     0.6177,
36 |     0.5822333333333334
37 |   ],
38 |   "RAVDESS": [
39 |     0.3849,
40 |     0.2688,
41 |     0.3422,
42 |     0.3319666666666667
43 |   ],
44 |   "SESA": [
45 |     0.9143,
46 |     0.8952,
47 |     0.8762,
48 |     0.8952333333333332
49 |   ],
50 |   "TUT2017": [
51 |     0.6391,
52 |     0.6667,
53 |     0.6525,
54 |     0.6527666666666666
55 |   ],
56 |   "UrbanSound8K": [
57 |     0.76,
58 |     0.7378,
59 |     0.7666,
60 |     0.7548
61 |   ],
62 |   "VocalSound": [
63 |     0.7162,
64 |     0.7485,
65 |     0.6642,
66 |     0.7096333333333334
67 |   ]
68 | }


--------------------------------------------------------------------------------
/logs/palm/accuracy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Beijing-Opera": [
 3 |     0.9747,
 4 |     0.9066,
 5 |     0.9787,
 6 |     0.9533333333333333
 7 |   ],
 8 |   "CREMA-D": [
 9 |     0.4453,
10 |     0.358,
11 |     0.2344,
12 |     0.34589999999999993
13 |   ],
14 |   "ESC50-Actions": [
15 |     0.97,
16 |     0.9625,
17 |     0.965,
18 |     0.9658333333333333
19 |   ],
20 |   "ESC50": [
21 |     0.956,
22 |     0.96,
23 |     0.962,
24 |     0.9593333333333334
25 |   ],
26 |   "GT-Music-Genre": [
27 |     0.79,
28 |     0.785,
29 |     0.825,
30 |     0.8000000000000002
31 |   ],
32 |   "NS-Instruments": [
33 |     0.6394,
34 |     0.6108,
35 |     0.6648,
36 |     0.6383333333333333
37 |   ],
38 |   "RAVDESS": [
39 |     0.4562,
40 |     0.4603,
41 |     0.4623,
42 |     0.4596
43 |   ],
44 |   "SESA": [
45 |     0.8857,
46 |     0.9143,
47 |     0.8857,
48 |     0.8952333333333334
49 |   ],
50 |   "TUT2017": [
51 |     0.7959,
52 |     0.8047,
53 |     0.7729,
54 |     0.7911666666666667
55 |   ],
56 |   "UrbanSound8K": [
57 |     0.812,
58 |     0.8037,
59 |     0.8074,
60 |     0.8077
61 |   ],
62 |   "VocalSound": [
63 |     0.8101,
64 |     0.8168,
65 |     0.7964,
66 |     0.8077666666666667
67 |   ]
68 | }


--------------------------------------------------------------------------------
/logs/cocoop/accuracy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Beijing-Opera": [
 3 |     0.9577,
 4 |     0.983,
 5 |     0.9916,
 6 |     0.9774333333333334
 7 |   ],
 8 |   "CREMA-D": [
 9 |     0.2539,
10 |     0.3358,
11 |     0.3156,
12 |     0.3017666666666667
13 |   ],
14 |   "ESC50-Actions": [
15 |     0.97,
16 |     0.9625,
17 |     0.965,
18 |     0.9658333333333333
19 |   ],
20 |   "ESC50": [
21 |     0.946,
22 |     0.937,
23 |     0.945,
24 |     0.9426666666666667
25 |   ],
26 |   "GT-Music-Genre": [
27 |     0.75,
28 |     0.745,
29 |     0.76,
30 |     0.7516666666666666
31 |   ],
32 |   "NS-Instruments": [
33 |     0.5996,
34 |     0.574,
35 |     0.6438,
36 |     0.6058
37 |   ],
38 |   "RAVDESS": [
39 |     0.3727,
40 |     0.4399,
41 |     0.3523,
42 |     0.38830000000000003
43 |   ],
44 |   "SESA": [
45 |     0.8381,
46 |     0.8762,
47 |     0.8952,
48 |     0.8698333333333332
49 |   ],
50 |   "TUT2017": [
51 |     0.7499,
52 |     0.7215,
53 |     0.7312,
54 |     0.7342
55 |   ],
56 |   "UrbanSound8K": [
57 |     0.7576,
58 |     0.7784,
59 |     0.7597,
60 |     0.7652333333333333
61 |   ],
62 |   "VocalSound": [
63 |     0.8081,
64 |     0.7825,
65 |     0.7463,
66 |     0.7789666666666667
67 |   ]
68 | }


--------------------------------------------------------------------------------
/logs/coop/f1_score.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Beijing-Opera": [
 3 |     0.9309,
 4 |     0.965,
 5 |     0.9605,
 6 |     0.9521333333333333
 7 |   ],
 8 |   "CREMA-D": [
 9 |     0.2609,
10 |     0.2745,
11 |     0.2389,
12 |     0.25810000000000005
13 |   ],
14 |   "ESC50-Actions": [
15 |     0.9618,
16 |     0.939,
17 |     0.9532,
18 |     0.9513333333333334
19 |   ],
20 |   "ESC50": [
21 |     0.9399,
22 |     0.9369,
23 |     0.9335,
24 |     0.9367666666666666
25 |   ],
26 |   "GT-Music-Genre": [
27 |     0.719,
28 |     0.6937,
29 |     0.7153,
30 |     0.7093333333333334
31 |   ],
32 |   "NS-Instruments": [
33 |     0.6028,
34 |     0.5593,
35 |     0.6153,
36 |     0.5924666666666667
37 |   ],
38 |   "RAVDESS": [
39 |     0.3668,
40 |     0.2385,
41 |     0.3075,
42 |     0.30426666666666663
43 |   ],
44 |   "SESA": [
45 |     0.9044,
46 |     0.8959,
47 |     0.8639,
48 |     0.8880666666666667
49 |   ],
50 |   "TUT2017": [
51 |     0.6315,
52 |     0.6596,
53 |     0.6409,
54 |     0.644
55 |   ],
56 |   "UrbanSound8K": [
57 |     0.7577,
58 |     0.7382,
59 |     0.761,
60 |     0.7523
61 |   ],
62 |   "VocalSound": [
63 |     0.6953,
64 |     0.7481,
65 |     0.6606,
66 |     0.7013333333333334
67 |   ]
68 | }


--------------------------------------------------------------------------------
/logs/palm/f1_score.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Beijing-Opera": [
 3 |     0.9738,
 4 |     0.9046,
 5 |     0.9786,
 6 |     0.9523333333333334
 7 |   ],
 8 |   "CREMA-D": [
 9 |     0.3277,
10 |     0.3083,
11 |     0.199,
12 |     0.2783333333333333
13 |   ],
14 |   "ESC50-Actions": [
15 |     0.9698,
16 |     0.962,
17 |     0.9644,
18 |     0.9653999999999999
19 |   ],
20 |   "ESC50": [
21 |     0.9544,
22 |     0.9591,
23 |     0.9614,
24 |     0.9583
25 |   ],
26 |   "GT-Music-Genre": [
27 |     0.7914,
28 |     0.7888,
29 |     0.8257,
30 |     0.8019666666666666
31 |   ],
32 |   "NS-Instruments": [
33 |     0.6418,
34 |     0.6062,
35 |     0.6834,
36 |     0.6438
37 |   ],
38 |   "RAVDESS": [
39 |     0.4486,
40 |     0.4718,
41 |     0.4435,
42 |     0.4546333333333334
43 |   ],
44 |   "SESA": [
45 |     0.8955,
46 |     0.9197,
47 |     0.8805,
48 |     0.8985666666666666
49 |   ],
50 |   "TUT2017": [
51 |     0.7942,
52 |     0.8024,
53 |     0.772,
54 |     0.7895333333333333
55 |   ],
56 |   "UrbanSound8K": [
57 |     0.8158,
58 |     0.8071,
59 |     0.8101,
60 |     0.8109999999999999
61 |   ],
62 |   "VocalSound": [
63 |     0.8107,
64 |     0.8179,
65 |     0.7972,
66 |     0.8086000000000001
67 |   ]
68 | }


--------------------------------------------------------------------------------
/logs/cocoop/f1_score.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Beijing-Opera": [
 3 |     0.9556,
 4 |     0.9824,
 5 |     0.991,
 6 |     0.9763333333333334
 7 |   ],
 8 |   "CREMA-D": [
 9 |     0.2267,
10 |     0.2681,
11 |     0.2851,
12 |     0.2599666666666667
13 |   ],
14 |   "ESC50-Actions": [
15 |     0.9703,
16 |     0.9621,
17 |     0.9648,
18 |     0.9657333333333332
19 |   ],
20 |   "ESC50": [
21 |     0.9448,
22 |     0.9351,
23 |     0.9431,
24 |     0.9410000000000002
25 |   ],
26 |   "GT-Music-Genre": [
27 |     0.7383,
28 |     0.7417,
29 |     0.7623,
30 |     0.7474333333333334
31 |   ],
32 |   "NS-Instruments": [
33 |     0.6199,
34 |     0.584,
35 |     0.6417,
36 |     0.6152000000000001
37 |   ],
38 |   "RAVDESS": [
39 |     0.3634,
40 |     0.4351,
41 |     0.3436,
42 |     0.38070000000000004
43 |   ],
44 |   "SESA": [
45 |     0.8425,
46 |     0.8821,
47 |     0.8847,
48 |     0.8697666666666667
49 |   ],
50 |   "TUT2017": [
51 |     0.7474,
52 |     0.7198,
53 |     0.7246,
54 |     0.7306
55 |   ],
56 |   "UrbanSound8K": [
57 |     0.7581,
58 |     0.7741,
59 |     0.7555,
60 |     0.7625666666666667
61 |   ],
62 |   "VocalSound": [
63 |     0.8054,
64 |     0.7759,
65 |     0.7451,
66 |     0.7754666666666666
67 |   ]
68 | }


--------------------------------------------------------------------------------
/scripts/sesa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DATASET="SESA"
 3 | METHOD=$1
 4 | 
 5 | 
 6 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then
 7 |     echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']"
 8 |     exit 1
 9 | fi
10 | 
11 | echo "Running METHOD=$METHOD on DATASET=$DATASET"
12 | 
13 | DATASET_ROOT="<SET_PATH_TO_DATASET_ROOT_DIRECTORY_HERE>/Audio-Datasets/$DATASET"
14 | 
15 | if [ -d "$DATASET_ROOT" ]; then
16 |     echo "Dataset path exists: $DATASET_ROOT"
17 | else
18 |     echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT"
19 | fi
20 | 
21 | 
22 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then
23 |     CTX_DIM=512
24 | else
25 |     CTX_DIM=1024
26 | fi
27 | 
28 | 
29 | if [ "$METHOD" = "zeroshot" ]; then
30 |     SEEDS=0
31 | else
32 |     SEEDS="0 1 2"
33 | fi
34 | 
35 | 
36 | 
37 | for SEED in $SEEDS
38 |     do
39 |         python main.py \
40 |             --model_name $METHOD \
41 |             --dataset_root $DATASET_ROOT \
42 |             --n_epochs 50 \
43 |             --freq_test_model 10 \
44 |             --ctx_dim $CTX_DIM \
45 |             --batch_size 16 \
46 |             --lr 0.05 \
47 |             --seed $SEED \
48 |             --exp_name "$DATASET" \
49 |             --num_shots 16 \
50 |             --do_logging
51 |     done


--------------------------------------------------------------------------------
/scripts/crema_d.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DATASET="CREMA-D"
 3 | METHOD=$1
 4 | 
 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then
 6 |     echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']"
 7 |     exit 1
 8 | fi
 9 | 
10 | echo "Running METHOD=$METHOD on DATASET=$DATASET"
11 | 
12 | DATASET_ROOT="<SET_PATH_TO_DATASET_ROOT_DIRECTORY_HERE>/Audio-Datasets/$DATASET"
13 | 
14 | if [ -d "$DATASET_ROOT" ]; then
15 |     echo "Dataset path exists: $DATASET_ROOT"
16 | else
17 |     echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT"
18 | fi
19 | 
20 | 
21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then
22 |     CTX_DIM=512
23 | else
24 |     CTX_DIM=1024
25 | fi
26 | 
27 | 
28 | if [ "$METHOD" = "zeroshot" ]; then
29 |     SEEDS=0
30 | else
31 |     SEEDS="0 1 2"
32 | fi
33 | 
34 | 
35 | 
36 | 
37 | for SEED in $SEEDS
38 |     do
39 |         python main.py \
40 |             --model_name $METHOD \
41 |             --dataset_root $DATASET_ROOT \
42 |             --n_epochs 50 \
43 |             --freq_test_model 10 \
44 |             --ctx_dim $CTX_DIM \
45 |             --batch_size 16 \
46 |             --lr 0.05 \
47 |             --seed $SEED \
48 |             --exp_name "$DATASET" \
49 |             --num_shots 16 \
50 |             --do_logging
51 |     done


--------------------------------------------------------------------------------
/scripts/ravdess.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DATASET="RAVDESS"
 3 | METHOD=$1
 4 | 
 5 | 
 6 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then
 7 |     echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']"
 8 |     exit 1
 9 | fi
10 | 
11 | echo "Running METHOD=$METHOD on DATASET=$DATASET"
12 | 
13 | DATASET_ROOT="<SET_PATH_TO_DATASET_ROOT_DIRECTORY_HERE>/Audio-Datasets/$DATASET"
14 | 
15 | if [ -d "$DATASET_ROOT" ]; then
16 |     echo "Dataset path exists: $DATASET_ROOT"
17 | else
18 |     echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT"
19 | fi
20 | 
21 | 
22 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then
23 |     CTX_DIM=512
24 | else
25 |     CTX_DIM=1024
26 | fi
27 | 
28 | 
29 | if [ "$METHOD" = "zeroshot" ]; then
30 |     SEEDS=0
31 | else
32 |     SEEDS="0 1 2"
33 | fi
34 | 
35 | 
36 | 
37 | for SEED in $SEEDS
38 |     do
39 |         python main.py \
40 |             --model_name $METHOD \
41 |             --dataset_root $DATASET_ROOT \
42 |             --n_epochs 50 \
43 |             --freq_test_model 10 \
44 |             --ctx_dim $CTX_DIM \
45 |             --batch_size 16 \
46 |             --lr 0.05 \
47 |             --seed $SEED \
48 |             --exp_name "$DATASET" \
49 |             --num_shots 16 \
50 |             --do_logging
51 |     done


--------------------------------------------------------------------------------
/scripts/vocal_sound.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DATASET="VocalSound"
 3 | METHOD=$1
 4 | 
 5 | 
 6 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then
 7 |     echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']"
 8 |     exit 1
 9 | fi
10 | 
11 | echo "Running METHOD=$METHOD on DATASET=$DATASET"
12 | 
13 | DATASET_ROOT="<SET_PATH_TO_DATASET_ROOT_DIRECTORY_HERE>/Audio-Datasets/$DATASET"
14 | 
15 | if [ -d "$DATASET_ROOT" ]; then
16 |     echo "Dataset path exists: $DATASET_ROOT"
17 | else
18 |     echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT"
19 | fi
20 | 
21 | 
22 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then
23 |     CTX_DIM=512
24 | else
25 |     CTX_DIM=1024
26 | fi
27 | 
28 | 
29 | if [ "$METHOD" = "zeroshot" ]; then
30 |     SEEDS=0
31 | else
32 |     SEEDS="0 1 2"
33 | fi
34 | 
35 | 
36 | 
37 | for SEED in $SEEDS
38 |     do
39 |         python main.py \
40 |             --model_name $METHOD \
41 |             --dataset_root $DATASET_ROOT \
42 |             --n_epochs 50 \
43 |             --freq_test_model 10 \
44 |             --ctx_dim $CTX_DIM \
45 |             --batch_size 16 \
46 |             --lr 0.05 \
47 |             --seed $SEED \
48 |             --exp_name "$DATASET" \
49 |             --num_shots 16 \
50 |             --do_logging
51 |     done


--------------------------------------------------------------------------------
/scripts/gt_music_genre.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DATASET="GT-Music-Genre"
 3 | METHOD=$1
 4 | 
 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then
 6 |     echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']"
 7 |     exit 1
 8 | fi
 9 | 
10 | echo "Running METHOD=$METHOD on DATASET=$DATASET"
11 | 
12 | DATASET_ROOT="<SET_PATH_TO_DATASET_ROOT_DIRECTORY_HERE>/Audio-Datasets/$DATASET"
13 | 
14 | if [ -d "$DATASET_ROOT" ]; then
15 |     echo "Dataset path exists: $DATASET_ROOT"
16 | else
17 |     echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT"
18 | fi
19 | 
20 | 
21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then
22 |     CTX_DIM=512
23 | else
24 |     CTX_DIM=1024
25 | fi
26 | 
27 | 
28 | if [ "$METHOD" = "zeroshot" ]; then
29 |     SEEDS=0
30 | else
31 |     SEEDS="0 1 2"
32 | fi
33 | 
34 | 
35 | 
36 | 
37 | for SEED in $SEEDS
38 |     do
39 |         python main.py \
40 |             --model_name $METHOD \
41 |             --dataset_root $DATASET_ROOT \
42 |             --n_epochs 50 \
43 |             --freq_test_model 10 \
44 |             --ctx_dim $CTX_DIM \
45 |             --batch_size 16 \
46 |             --lr 0.05 \
47 |             --seed $SEED \
48 |             --exp_name "$DATASET" \
49 |             --num_shots 16 \
50 |             --do_logging
51 |     done


--------------------------------------------------------------------------------
/scripts/ns_instruments.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DATASET="NS-Instruments"
 3 | METHOD=$1
 4 | 
 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then
 6 |     echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']"
 7 |     exit 1
 8 | fi
 9 | 
10 | echo "Running METHOD=$METHOD on DATASET=$DATASET"
11 | 
12 | DATASET_ROOT="<SET_PATH_TO_DATASET_ROOT_DIRECTORY_HERE>/Audio-Datasets/$DATASET"
13 | 
14 | if [ -d "$DATASET_ROOT" ]; then
15 |     echo "Dataset path exists: $DATASET_ROOT"
16 | else
17 |     echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT"
18 | fi
19 | 
20 | 
21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then
22 |     CTX_DIM=512
23 | else
24 |     CTX_DIM=1024
25 | fi
26 | 
27 | 
28 | if [ "$METHOD" = "zeroshot" ]; then
29 |     SEEDS=0
30 | else
31 |     SEEDS="0 1 2"
32 | fi
33 | 
34 | 
35 | 
36 | 
37 | for SEED in $SEEDS
38 |     do
39 |         python main.py \
40 |             --model_name $METHOD \
41 |             --dataset_root $DATASET_ROOT \
42 |             --n_epochs 50 \
43 |             --freq_test_model 10 \
44 |             --ctx_dim $CTX_DIM \
45 |             --batch_size 16 \
46 |             --lr 0.05 \
47 |             --seed $SEED \
48 |             --exp_name "$DATASET" \
49 |             --num_shots 16 \
50 |             --do_logging
51 |     done
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/scripts/esc50.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DATASET="ESC50"
 3 | METHOD=$1
 4 | 
 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then
 6 |     echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']"
 7 |     exit 1
 8 | fi
 9 | 
10 | echo "Running METHOD=$METHOD on DATASET=$DATASET"
11 | 
12 | DATASET_ROOT="<SET_PATH_TO_DATASET_ROOT_DIRECTORY_HERE>/Audio-Datasets/$DATASET"
13 | 
14 | if [ -d "$DATASET_ROOT" ]; then
15 |     echo "Dataset path exists: $DATASET_ROOT"
16 | else
17 |     echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT"
18 | fi
19 | 
20 | 
21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then
22 |     CTX_DIM=512
23 | else
24 |     CTX_DIM=1024
25 | fi
26 | 
27 | 
28 | if [ "$METHOD" = "zeroshot" ]; then
29 |     SEEDS=0
30 | else
31 |     SEEDS="0 1 2"
32 | fi
33 | 
34 | 
35 | 
36 | for FOLD in 1 2 3 4 5
37 |     do
38 |         for SEED in $SEEDS
39 |             do
40 |                 echo "Running Fold-$FOLD with SEED=$SEED"
41 |                 if [ -f "$DATASET_ROOT/train.csv" ]; then rm -rf "$DATASET_ROOT/train.csv"; fi
42 |                 if [ -f "$DATASET_ROOT/test.csv" ]; then rm -rf "$DATASET_ROOT/test.csv"; fi
43 |                 cp "$DATASET_ROOT/csv_files/train_$FOLD.csv" "$DATASET_ROOT/train.csv"
44 |                 cp "$DATASET_ROOT/csv_files/test_$FOLD.csv" "$DATASET_ROOT/test.csv"
45 | 
46 |                 python main.py \
47 |                     --model_name $METHOD \
48 |                     --dataset_root $DATASET_ROOT \
49 |                     --n_epochs 50 \
50 |                     --freq_test_model 10 \
51 |                     --ctx_dim $CTX_DIM \
52 |                     --batch_size 16 \
53 |                     --lr 0.05 \
54 |                     --seed $SEED \
55 |                     --exp_name "$DATASET-FOLD$FOLD" \
56 |                     --num_shots 16 \
57 |                     --do_logging
58 |             done
59 |     done


--------------------------------------------------------------------------------
/scripts/tut.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DATASET="TUT2017"
 3 | METHOD=$1
 4 | 
 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then
 6 |     echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']"
 7 |     exit 1
 8 | fi
 9 | 
10 | echo "Running METHOD=$METHOD on DATASET=$DATASET"
11 | 
12 | DATASET_ROOT="<SET_PATH_TO_DATASET_ROOT_DIRECTORY_HERE>/Audio-Datasets/$DATASET"
13 | 
14 | if [ -d "$DATASET_ROOT" ]; then
15 |     echo "Dataset path exists: $DATASET_ROOT"
16 | else
17 |     echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT"
18 | fi
19 | 
20 | 
21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then
22 |     CTX_DIM=512
23 | else
24 |     CTX_DIM=1024
25 | fi
26 | 
27 | 
28 | if [ "$METHOD" = "zeroshot" ]; then
29 |     SEEDS=0
30 | else
31 |     SEEDS="0 1 2"
32 | fi
33 | 
34 | 
35 | 
36 | 
37 | for FOLD in 1 2 3 4 
38 |     do
39 |         for SEED in $SEEDS
40 |             do
41 |                 echo "Running Fold-$FOLD with SEED=$SEED"
42 |                 if [ -f "$DATASET_ROOT/train.csv" ]; then rm -rf "$DATASET_ROOT/train.csv"; fi
43 |                 if [ -f "$DATASET_ROOT/test.csv" ]; then rm -rf "$DATASET_ROOT/test.csv"; fi
44 |                 cp "$DATASET_ROOT/csv_files/train_$FOLD.csv" "$DATASET_ROOT/train.csv"
45 |                 cp "$DATASET_ROOT/csv_files/test_$FOLD.csv" "$DATASET_ROOT/test.csv"
46 | 
47 |                 python main.py \
48 |                     --model_name $METHOD \
49 |                     --dataset_root $DATASET_ROOT \
50 |                     --n_epochs 50 \
51 |                     --freq_test_model 10 \
52 |                     --ctx_dim $CTX_DIM \
53 |                     --batch_size 16 \
54 |                     --lr 0.05 \
55 |                     --seed $SEED \
56 |                     --exp_name "$DATASET-FOLD$FOLD" \
57 |                     --num_shots 16 \
58 |                     --do_logging
59 |             done
60 |     done


--------------------------------------------------------------------------------
/scripts/beijing_opera.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DATASET="Beijing-Opera"
 3 | METHOD=$1
 4 | 
 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then
 6 |     echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']"
 7 |     exit 1
 8 | fi
 9 | 
10 | echo "Running METHOD=$METHOD on DATASET=$DATASET"
11 | 
12 | DATASET_ROOT="<SET_PATH_TO_DATASET_ROOT_DIRECTORY_HERE>/Audio-Datasets/$DATASET"
13 | 
14 | if [ -d "$DATASET_ROOT" ]; then
15 |     echo "Dataset path exists: $DATASET_ROOT"
16 | else
17 |     echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT"
18 | fi
19 | 
20 | 
21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then
22 |     CTX_DIM=512
23 | else
24 |     CTX_DIM=1024
25 | fi
26 | 
27 | 
28 | if [ "$METHOD" = "zeroshot" ]; then
29 |     SEEDS=0
30 | else
31 |     SEEDS="0 1 2"
32 | fi
33 | 
34 | 
35 | for FOLD in 1 2 3 4 5 
36 |     do
37 |         for SEED in $SEEDS
38 |             do
39 |                 echo "Running Fold-$FOLD with SEED=$SEED"
40 |                 if [ -f "$DATASET_ROOT/train.csv" ]; then rm -rf "$DATASET_ROOT/train.csv"; fi
41 |                 if [ -f "$DATASET_ROOT/test.csv" ]; then rm -rf "$DATASET_ROOT/test.csv"; fi
42 |                 cp "$DATASET_ROOT/csv_files/train_$FOLD.csv" "$DATASET_ROOT/train.csv"
43 |                 cp "$DATASET_ROOT/csv_files/test_$FOLD.csv" "$DATASET_ROOT/test.csv"
44 | 
45 |                 python main.py \
46 |                     --model_name $METHOD \
47 |                     --dataset_root $DATASET_ROOT \
48 |                     --n_epochs 50 \
49 |                     --freq_test_model 10 \
50 |                     --ctx_dim $CTX_DIM \
51 |                     --batch_size 16 \
52 |                     --lr 0.05 \
53 |                     --seed $SEED \
54 |                     --exp_name "$DATASET-FOLD$FOLD" \
55 |                     --num_shots 16 \
56 |                     --do_logging 
57 |             done
58 |     done


--------------------------------------------------------------------------------
/scripts/esc50_actions.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DATASET="ESC50-Actions"
 3 | METHOD=$1
 4 | 
 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then
 6 |     echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']"
 7 |     exit 1
 8 | fi
 9 | 
10 | echo "Running METHOD=$METHOD on DATASET=$DATASET"
11 | 
12 | DATASET_ROOT="<SET_PATH_TO_DATASET_ROOT_DIRECTORY_HERE>/Audio-Datasets/$DATASET"
13 | 
14 | if [ -d "$DATASET_ROOT" ]; then
15 |     echo "Dataset path exists: $DATASET_ROOT"
16 | else
17 |     echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT"
18 | fi
19 | 
20 | 
21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then
22 |     CTX_DIM=512
23 | else
24 |     CTX_DIM=1024
25 | fi
26 | 
27 | 
28 | if [ "$METHOD" = "zeroshot" ]; then
29 |     SEEDS=0
30 | else
31 |     SEEDS="0 1 2"
32 | fi
33 | 
34 | 
35 | 
36 | 
37 | for FOLD in 1 2 3 4 5
38 |     do
39 |         for SEED in $SEEDS
40 |             do
41 |                 echo "Running Fold-$FOLD with SEED=$SEED"
42 |                 if [ -f "$DATASET_ROOT/train.csv" ]; then rm -rf "$DATASET_ROOT/train.csv"; fi
43 |                 if [ -f "$DATASET_ROOT/test.csv" ]; then rm -rf "$DATASET_ROOT/test.csv"; fi
44 |                 cp "$DATASET_ROOT/csv_files/train_$FOLD.csv" "$DATASET_ROOT/train.csv"
45 |                 cp "$DATASET_ROOT/csv_files/test_$FOLD.csv" "$DATASET_ROOT/test.csv"
46 | 
47 |                 python main.py \
48 |                     --model_name $METHOD \
49 |                     --dataset_root $DATASET_ROOT \
50 |                     --n_epochs 50 \
51 |                     --freq_test_model 10 \
52 |                     --ctx_dim $CTX_DIM \
53 |                     --batch_size 16 \
54 |                     --lr 0.05 \
55 |                     --seed $SEED \
56 |                     --exp_name "$DATASET-FOLD$FOLD" \
57 |                     --num_shots 16 \
58 |                     --do_logging
59 |             done
60 |     done


--------------------------------------------------------------------------------
/scripts/urban_sound.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DATASET="UrbanSound8K"
 3 | METHOD=$1
 4 | 
 5 | 
 6 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then
 7 |     echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']"
 8 |     exit 1
 9 | fi
10 | 
11 | echo "Running METHOD=$METHOD on DATASET=$DATASET"
12 | 
13 | DATASET_ROOT="<SET_PATH_TO_DATASET_ROOT_DIRECTORY_HERE>/Audio-Datasets/$DATASET"
14 | 
15 | if [ -d "$DATASET_ROOT" ]; then
16 |     echo "Dataset path exists: $DATASET_ROOT"
17 | else
18 |     echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT"
19 | fi
20 | 
21 | 
22 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then
23 |     CTX_DIM=512
24 | else
25 |     CTX_DIM=1024
26 | fi
27 | 
28 | 
29 | if [ "$METHOD" = "zeroshot" ]; then
30 |     SEEDS=0
31 | else
32 |     SEEDS="0 1 2"
33 | fi
34 | 
35 | 
36 | 
37 | for FOLD in 1 2 3 4 5 6 7 8 9 10
38 |     do
39 |         for SEED in $SEEDS
40 |             do
41 |                 echo "Running Fold-$FOLD with SEED=$SEED"
42 |                 if [ -f "$DATASET_ROOT/train.csv" ]; then rm -rf "$DATASET_ROOT/train.csv"; fi
43 |                 if [ -f "$DATASET_ROOT/test.csv" ]; then rm -rf "$DATASET_ROOT/test.csv"; fi
44 |                 cp "$DATASET_ROOT/csv_files/train_$FOLD.csv" "$DATASET_ROOT/train.csv"
45 |                 cp "$DATASET_ROOT/csv_files/test_$FOLD.csv" "$DATASET_ROOT/test.csv"
46 | 
47 |                 python main.py \
48 |                     --model_name $METHOD \
49 |                     --dataset_root $DATASET_ROOT \
50 |                     --n_epochs 50 \
51 |                     --freq_test_model 10 \
52 |                     --ctx_dim $CTX_DIM \
53 |                     --batch_size 16 \
54 |                     --lr 0.05 \
55 |                     --seed $SEED \
56 |                     --exp_name "$DATASET-FOLD$FOLD" \
57 |                     --num_shots 16 \
58 |                     --do_logging
59 |             done
60 |     done


--------------------------------------------------------------------------------
/logs/zeroshot/SESA-SEED0.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ##############################################
 4 | PALM: Prompt Learning in Audio Language Models
 5 | ##############################################
 6 | 
 7 | 
 8 | Time & Date = 09:10 PM , 10_Jun_2024  GST
 9 | 
10 | 
11 | 
12 | Model:    ZEROSHOT
13 | Dataset:  SESA
14 | Seed:     0
15 | 
16 | 
17 | Creating a 16-shot dataset ...
18 | 
19 | 
20 | ################## Dataset Information ##################
21 | FewShot Dataset
22 | 
23 | Root: REDACTED/Audio-Datasets/SESA
24 | 
25 | Number of Classes: 4
26 | 
27 | Number of Shots: 16
28 | 
29 | Total Number of Samples: 64
30 | 
31 | Classnames: ['casual', 'explosion', 'gunshot', 'siren']
32 | 
33 | Label to Classname: {0: 'casual', 1: 'explosion', 2: 'gunshot', 3: 'siren'}
34 | 
35 | Classname to Label: {'casual': 0, 'explosion': 1, 'gunshot': 2, 'siren': 3}
36 | ########################################################
37 | 
38 | 
39 | 
40 | 
41 | ################## Dataset Information ##################
42 | FewShot Dataset
43 | 
44 | Root: REDACTED/Audio-Datasets/SESA
45 | 
46 | Number of Classes: 4
47 | 
48 | Number of Shots: -1
49 | 
50 | Total Number of Samples: 105
51 | 
52 | Classnames: ['casual', 'explosion', 'gunshot', 'siren']
53 | 
54 | Label to Classname: {0: 'casual', 1: 'explosion', 2: 'gunshot', 3: 'siren'}
55 | 
56 | Classname to Label: {'casual': 0, 'explosion': 1, 'gunshot': 2, 'siren': 3}
57 | ########################################################
58 | 
59 | 
60 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
61 |   warnings.warn(
62 | 
63 | 
64 | ################## Zero-Shot PENGI Information ##################
65 | Prompt Prefix:  The is a recording of 
66 | Prompts:  ['The is a recording of  casual.', 'The is a recording of  explosion.', 'The is a recording of  gunshot.', 'The is a recording of  siren.']
67 | ###################################################################
68 | 
69 | 
70 | args:  Namespace(batch_size=16, classnames=['casual', 'explosion', 'gunshot', 'siren'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/SESA', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='SESA', freq_test_model=10, json_file_path='logs/zeroshot/SESA.json', log_dir='logs/zeroshot', lr=0.05, model_name='zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7fad554ea160>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, save_model_path='REDACTED/Audio-Datasets/SESA', seed=0, spec_aug=False, start_epoch=0)
71 | 
72 | 
73 | Evaluating the model ...
74 | 
75 |   0%|          | 0/1 [00:00<?, ?it/s]
76 | 100%|##########| 1/1 [00:06<00:00,  6.19s/it]
77 | 100%|##########| 1/1 [00:06<00:00,  6.38s/it]
78 | 
79 | 
80 | Time & Date = 09:10 PM , 10_Jun_2024  GST
81 | 
82 | Total Time => 0 Hours : 0 Minutes : 6 Seconds
83 | 
84 | 
85 | Accuracy             = 0.7238
86 | F1-Score             = 0.6827
87 | Precision            = 0.6941
88 | Recall               = 0.7508
89 | Average Loss         = 1.3722
90 | 
91 | 
92 | Saving Results ...
93 | Results Saved
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/palm/zeroshot.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from .encoders import AudioEncoder, TextEncoder
 3 | 
 4 | 
 5 | 
 6 | class ZeroShotPENGI(torch.nn.Module):
 7 |     def __init__(self, args, pengi):
 8 |         super().__init__()
 9 | 
10 |         self.args = args
11 |         pengi_args  = pengi.args
12 |         self.pengi_args = pengi_args
13 |         self.process_text = pengi.preprocess_text
14 | 
15 |         pengi_args.specaug = args.spec_aug
16 | 
17 |         self.audio_encoder = AudioEncoder(
18 |                     pengi_args.audioenc_name, pengi_args.out_emb, pengi_args.d_proj,
19 |                     pengi_args.sampling_rate, pengi_args.window_size, pengi_args.hop_size, pengi_args.mel_bins, pengi_args.fmin, pengi_args.fmax, pengi_args.classes_num, 
20 |                     pengi_args.specaug, pengi_args.mixup, pengi_args.use_pretrained_audioencoder, pengi_args.freeze_audio_encoder_weights,
21 |                     pengi_args.use_precomputed_melspec, pengi_args.pretrained_audioencoder_path)
22 | 
23 |         self.text_encoder = TextEncoder(
24 |                     pengi_args.d_proj, 
25 |                     pengi_args.text_model, pengi_args.transformer_embed_dim,
26 |                     pengi_args.freeze_text_encoder_weights)
27 | 
28 | 
29 |         # load the weights of the pengi pre-trained audio and text encoders
30 |         print("ZERO SHOT: loading the weights of the pengi pre-trained audio and text encoders ...")
31 |         self.audio_encoder.load_state_dict(pengi.model.audio_encoder.state_dict())
32 |         self.text_encoder.load_state_dict(pengi.model.caption_encoder.state_dict())
33 | 
34 | 
35 |         self.audio_encoder.eval()
36 |         self.text_encoder.eval()
37 |    
38 |         self.device = args.device
39 | 
40 |         prompt_prefix = args.prompt_prefix
41 |         self.prompts = [f"{prompt_prefix} {class_name}." for class_name in args.classnames]
42 | 
43 |         print("\n\n################## Zero-Shot PENGI Information ##################")
44 |         print("Prompt Prefix: ", prompt_prefix)
45 |         print("Prompts: ", self.prompts)
46 |         print("###################################################################\n\n")
47 |         
48 |     def forward(self, audio):
49 | 
50 |         audio_features = self.audio_encoder(audio)[0] # audio_features shape [n_audio_files, 1024]
51 |         audio_features = audio_features / audio_features.norm(dim=-1, keepdim=True)
52 |  
53 | 
54 |         tokenized_prompts = self.process_text(self.prompts, enc_tok=True, add_text=False)
55 |         
56 |         prompts_tokens = tokenized_prompts['input_ids'].to(self.device)
57 |         # breakpoint()
58 |         prompts_token_embeddings = self.text_encoder.base.embeddings.token_embedding(prompts_tokens).to(self.device)   # [batch_size, seq_length, embed_dim]
59 |         prompts_attention_mask = tokenized_prompts['attention_mask'].to(self.device)
60 |         
61 |         text = {"input_ids": prompts_tokens, "inputs_embeds": prompts_token_embeddings, "attention_mask": prompts_attention_mask}
62 |         text_features = self.text_encoder(text) # text_features shape [n_text_prompts, 1024]
63 |         text_features = text_features / text_features.norm(dim=-1, keepdim=True)
64 | 
65 |         logit_scale = 100.0
66 |         logits = logit_scale * audio_features @ text_features.t()  # logits shape [n_audio_files, n_text_prompts]
67 |         # breakpoint()
68 | 
69 |         return logits
70 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import numpy as np
 4 | import datetime
 5 | import pytz
 6 | from tqdm import tqdm
 7 | from pprint import pprint
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | import palm
13 | from pengi import pengi
14 | 
15 | 
16 | from utils import trainer
17 | from utils.utils import print_total_time, get_args, get_dataloaders, get_model, setup_logging, get_scores, print_scores, save_scores, load_model
18 | 
19 | # to solve  the issue of : the current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks
20 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
21 | 
22 | 
23 | def main(args):
24 | 
25 |     print(f"\n\n{'Model:':<10}{args.model_name.upper()}")
26 |     print(f"{'Dataset:':<10}{args.dataset_root.split('/')[-1]}")
27 |     print(f"{'Seed:':<10}{args.seed}\n\n")
28 | 
29 | 
30 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31 |     args.device = device
32 | 
33 |     args.process_audio_fn = pengi.preprocess_audio
34 | 
35 |     # to ensure reproducibility
36 |     seed = args.seed
37 |     torch.manual_seed(seed)
38 |     torch.cuda.manual_seed(seed)
39 |     torch.cuda.manual_seed_all(seed)
40 |     np.random.seed(seed)
41 |     random.seed(seed)
42 | 
43 |     
44 |     train_dataloader, test_dataloader = get_dataloaders(args)
45 |     args.classnames = train_dataloader.dataset.classnames
46 |     assert train_dataloader.dataset.classnames == test_dataloader.dataset.classnames, "Classnames in train and test datasets are different."
47 | 
48 |     model = get_model(args, pengi, palm)
49 |     model.to(device)
50 | 
51 |     criterion = nn.CrossEntropyLoss()
52 |     
53 |     print("\nArguments:\n")
54 |     for arg in vars(args): print(f"{arg:<25}: {getattr(args, arg)}")
55 |     print("\n\n")
56 | 
57 | 
58 |     if args.eval_only:
59 |         if args.model_name != "zeroshot": load_model(args, model)
60 |         test_loss, actual_labels, predicted_labels = trainer.run_evaluation(model, test_dataloader, criterion, device)
61 |         accuracy, f1_score, precision, recall =  get_scores(actual_labels, predicted_labels, args.classnames)
62 |         print(f"\n\n-------------------------------\nTest Evaluation\n-------------------------------\n")
63 |         print_scores(accuracy, f1_score, precision, recall, test_loss)
64 |         if args.do_logging:
65 |             print("Saving Results ...") 
66 |             save_scores(args.seed, -1, accuracy, f1_score, precision, recall, test_loss, args.json_file_path)
67 |             print("Results Saved\n\n")
68 |     else:
69 |         #optimizer = torch.optim.Adam(model.prompt_learner.parameters(), lr=args.lr)
70 |         optimizer = torch.optim.SGD(model.prompt_learner.parameters(), lr=args.lr, momentum=0.9)
71 |         trainer.run_training(model, train_dataloader, test_dataloader, optimizer, criterion, device, epochs=args.n_epochs, args=args)
72 | 
73 | 
74 | 
75 | if __name__ == "__main__":
76 | 
77 |     args = get_args()
78 |     log_file = setup_logging(args)
79 | 
80 |     print("\n\n##############################################")
81 |     print("PALM: Prompt Learning in Audio Language Models")
82 |     print("##############################################\n\n")
83 |     date_now = datetime.datetime.now(pytz.timezone('Asia/Dubai'))
84 |     print(f'Time & Date = {date_now.strftime("%I:%M %p")} , {date_now.strftime("%d_%b_%Y")}  GST\n')
85 | 
86 |     main(args)
87 | 
88 | 


--------------------------------------------------------------------------------
/logs/process_results.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | 
  5 | 
  6 | 
  7 | # Function to load JSON data from a file
  8 | def load_json(filepath):
  9 |     with open(filepath, 'r') as f:
 10 |         return json.load(f)
 11 | 
 12 | 
 13 | def check_seed_existence(results):
 14 |     seeds_exist = []
 15 |     for seed in SEEDS:
 16 |         if f'seed_{seed}' in results.keys(): seeds_exist.append(seed)
 17 |     return seeds_exist
 18 | 
 19 | 
 20 | # Function to get results for all seeds of a dataset and method   
 21 | def get_dataset_results(dataset):
 22 |     json_path = f"{os.path.join(results_folder, dataset)}.json"
 23 |     
 24 |     if os.path.exists(json_path):
 25 |         results = load_json(json_path)
 26 |         seeds_exist = check_seed_existence(results)
 27 |         if len(seeds_exist) != len(SEEDS): raise ValueError(f"Seeds {set(SEEDS)-set(seeds_exist)} not found in {json_path} file. Get results for all seeds first in '{json_path}'.")
 28 |     else:
 29 |         raise ValueError(f"File {json_path} does not exist. Get results for Dataset='{dataset}'.") 
 30 |     
 31 |     return results
 32 | 
 33 | 
 34 | 
 35 | def get_results():
 36 |     results = {}
 37 |     for dataset in DATASETS:
 38 |         results[dataset] = get_dataset_results(dataset)
 39 |     return results
 40 |   
 41 | if __name__ == "__main__":
 42 |     
 43 |     # Datasets 
 44 |     DATASETS = [
 45 |                 'Beijing-Opera',
 46 |                 'CREMA-D',
 47 |                 'ESC50-Actions',
 48 |                 'ESC50',
 49 |                 'GT-Music-Genre',
 50 |                 'NS-Instruments',
 51 |                 'RAVDESS',
 52 |                 'SESA',
 53 |                 'TUT2017',
 54 |                 'UrbanSound8K',
 55 |                 'VocalSound',
 56 |             ]
 57 | 
 58 | 
 59 | 
 60 |     methods = ['zeroshot', 'coop', 'cocoop', 'palm']
 61 | 
 62 |     for method in methods:
 63 |         # Folder containing the JSON files
 64 |         results_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), method)
 65 | 
 66 |         if method == 'zeroshot':
 67 |             SEEDS = [0]
 68 |         else:
 69 |             SEEDS = [0,1,2]
 70 | 
 71 |         results = get_results()
 72 |         
 73 | 
 74 |         accuracy_dict = {}
 75 |         f1_score_dict = {}
 76 | 
 77 |         for dataset in DATASETS:
 78 | 
 79 |             accuracy_sub_list = []
 80 |             f1_score_sub_list = []
 81 | 
 82 |             for seed in SEEDS:
 83 |                 accuracy_sub_list.append(results[dataset][f'seed_{seed}']['accuracy'])
 84 |                 f1_score_sub_list.append(results[dataset][f'seed_{seed}']['f1_score'])
 85 |             
 86 |             if len(accuracy_sub_list) > 1:
 87 |                 accuracy_sub_list.append(np.mean(accuracy_sub_list))
 88 |                 f1_score_sub_list.append(np.mean(f1_score_sub_list))
 89 | 
 90 |             accuracy_dict[dataset] = accuracy_sub_list
 91 |             f1_score_dict[dataset] = f1_score_sub_list
 92 | 
 93 |         
 94 |         with open(os.path.join(results_folder,'accuracy.json'), 'w') as f:
 95 |             json.dump(accuracy_dict, f, indent=2)
 96 |         print(f"Accuracy results saved in {os.path.join(results_folder,'accuracy.json')} file.")
 97 | 
 98 | 
 99 |         with open(os.path.join(results_folder,'f1_score.json'), 'w') as f:
100 |             json.dump(f1_score_dict, f, indent=2)
101 |         print(f"F1-score results saved in {os.path.join(results_folder,'f1_score.json')}.")
102 | 
103 | 
104 |     print("\n\nResults saved successfully.\n\n")


--------------------------------------------------------------------------------
/utils/trainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import numpy as np
 4 | from tqdm import tqdm
 5 | 
 6 | from .utils import get_scores, print_scores, save_scores, timeit, save_model, get_save_model_path
 7 | 
 8 | 
 9 | def run_epoch(model, dataloader, optimizer, criterion, device, args=None):
10 |     model.train()
11 | 
12 |     losses = []
13 |     actual_labels = []
14 |     predicted_labels = []
15 | 
16 |     for i, (audio, label) in enumerate(dataloader):
17 | 
18 |         audio = audio.to(device).squeeze(1)
19 |         label = label.to(device)
20 |  
21 | 
22 |         logits = model(audio)
23 |         loss = criterion(logits, label)
24 |         
25 |         optimizer.zero_grad()
26 |         loss.backward()
27 |         optimizer.step()
28 |         
29 |         losses.append(loss.item())
30 | 
31 |         actual_labels.extend(label.cpu().numpy())
32 |         predicted_labels.extend(logits.argmax(axis=1).cpu().numpy())
33 | 
34 |     avg_loss = sum(losses) / len(losses)
35 | 
36 |     return avg_loss, actual_labels, predicted_labels
37 | 
38 | 
39 | @timeit
40 | def run_evaluation(model, dataloader, criterion, device):
41 |     model.eval()
42 | 
43 |     losses = []
44 |     actual_labels = []
45 |     predicted_labels = []
46 |     
47 |     print("\n\nEvaluating the model ...")
48 |     with torch.no_grad():
49 |         for i, (audio, label) in enumerate(dataloader):
50 |         # for i, (audio, label) in tqdm(enumerate(dataloader), total=len(dataloader)):
51 |             print(f"Batch {i+1}/{len(dataloader)}")
52 | 
53 |             audio = audio.to(device).squeeze(1)
54 |             label = label.to(device)
55 |             
56 |             logits = model(audio)
57 |             loss = criterion(logits, label)
58 | 
59 |             losses.append(loss.item())
60 | 
61 |             actual_labels.extend(label.cpu().numpy())
62 |             predicted_labels.extend(logits.argmax(axis=1).cpu().numpy())
63 | 
64 |     avg_loss = sum(losses) / len(losses)
65 | 
66 |     return avg_loss, actual_labels, predicted_labels
67 | 
68 | 
69 | @timeit
70 | def run_training(model, train_dataloader, test_dataloader, optimizer, criterion, device, epochs=50, args=None):
71 |     
72 |     for epoch in tqdm(range(epochs), total=epochs):
73 | 
74 |         train_loss, actual_labels, predicted_labels = run_epoch(model, train_dataloader, optimizer, criterion, device, args=args)
75 | 
76 |         if (epoch+1)%5 == 0:
77 |             accuracy, f1_score, precision, recall =  get_scores(actual_labels, predicted_labels, args.classnames)
78 |             print(f"\n\n-------------------------------\nTrain Evaluation (Epoch {epoch + 1}/{epochs})\n-------------------------------\n")
79 |             print_scores(accuracy, f1_score, precision, recall, train_loss) 
80 |             
81 | 
82 |         if (epoch+1)%args.freq_test_model == 0:
83 |             test_loss, actual_labels, predicted_labels = run_evaluation(model, test_dataloader, criterion, device)
84 |             accuracy, f1_score, precision, recall =  get_scores(actual_labels, predicted_labels, args.classnames)
85 |             print(f"\n\n-------------------------------\nTest Evaluation\n-------------------------------\n")
86 |             print_scores(accuracy, f1_score, precision, recall, test_loss)
87 | 
88 |             if (epoch == epochs-1) and args.do_logging:
89 |                 print("\n\nFinal Evaluation")
90 |                 print("Saving Results ...")
91 |                 save_scores(args.seed, epoch, accuracy, f1_score, precision, recall, test_loss, args.json_file_path)
92 |                 print("Results Saved\n\n")
93 |     
94 | 
95 |     if args.save_model:
96 |         save_model_path = get_save_model_path(args)
97 |         save_model(args, model, save_model_path)
98 |         print(f"Model saved to {save_model_path}")
99 |         


--------------------------------------------------------------------------------
/utils/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | import pandas as pd
 4 | import os
 5 | 
 6 | 
 7 | 
 8 | class FewShotDataset(Dataset):
 9 |     def __init__(self, root, split=None, num_shots=-1, repeat=False, process_audio_fn=None, resample=True): 
10 |         """
11 |         Args:
12 |             root (str): path to the dataset.
13 |             num_shots (int): number of shots per class.
14 |             repeat (bool): repeat samples if needed (default: False).
15 |             process_audio_fn (function): function to process audio samples.
16 |             resample (bool): resample audio samples (default: True).
17 |             
18 |         """
19 | 
20 |         assert split is not None, "'split' cannot be None. Choose from ['train', 'test']"
21 |         
22 |         self.root = root
23 |         self.split = split
24 |         self.num_shots = num_shots
25 |         self.repeat = repeat
26 |         self.resample = resample
27 | 
28 |         df = pd.read_csv(os.path.join(root, f"{split}.csv"))
29 |         
30 |         self.classnames = df['classname'].unique().tolist()
31 |         self.classnames.sort()
32 |         self.label2classname = {i: classname for i, classname in enumerate(self.classnames)}
33 |         self.classname2label = {classname: i for i, classname in enumerate(self.classnames)}
34 |         
35 |         self.data = self.generate_fewshot_dataset(df, num_shots=num_shots, repeat=repeat)
36 | 
37 |         self.process_audio_fn = process_audio_fn
38 | 
39 |         print("\n\n################## Dataset Information ##################")
40 |         if num_shots>0: print("FewShot Dataset")
41 |         print(f"{'Root':<25} : {root}")
42 |         print(f"{'Split':<25} : {split}")
43 |         print(f"{'Number of Classes':<25} : {len(self.classnames)}")
44 |         print(f"{'Number of Shots':<25} : {num_shots}")
45 |         print(f"{'Total Number of Samples':<25} : {len(self.data)}")
46 |         print(f"{'Classnames':<25} : {self.classnames}")
47 |         print(f"{'Label to Classname':<25} : {self.label2classname}")
48 |         print(f"{'Classname to Label':<25} : {self.classname2label}")
49 |         print("########################################################\n\n")
50 | 
51 |     def generate_fewshot_dataset(self, df, num_shots=-1, repeat=False):
52 |         """
53 |         Generate a few-shot dataset.
54 |         Args:
55 |             df (pd.DataFrame): dataframe containing the dataset.
56 |             num_shots (int): number of shots per class.
57 |             repeat (bool): repeat samples if needed.
58 |         """
59 | 
60 |         if num_shots == -1:
61 |             return df
62 | 
63 |         print(f"Creating a {num_shots}-shot dataset ...")
64 |         df_subset = pd.DataFrame(columns=df.columns)
65 | 
66 |         for classname in self.classnames:
67 | 
68 |             df_class = df[df['classname'] == classname]
69 | 
70 |             if len(df_class) >= num_shots:
71 |                 df_subset = pd.concat([df_subset, df_class.sample(num_shots)])
72 |             else:
73 |                 if repeat:
74 |                     df_subset = pd.concat([df_subset, df_class.sample(num_shots, replace=True)])
75 |                 else:
76 |                     df_subset = pd.concat([df_subset,df_class])
77 | 
78 | 
79 |         df_subset = df_subset.reset_index(drop=True)
80 | 
81 |         return df_subset
82 | 
83 | 
84 |     def __len__(self):
85 |         return len(self.data)
86 |     
87 | 
88 |     def __getitem__(self, idx):
89 |         row = self.data.iloc[idx]
90 |         audio_path = os.path.join(self.root, row['path'])
91 |         audio = self.process_audio_fn([audio_path], self.resample) # [1,n_samples]
92 |         label = self.classname2label[row['classname']]
93 |         # return audio, label, audio_path, row['classname']
94 |         return audio, label
95 |     
96 | 
97 |     
98 | 


--------------------------------------------------------------------------------
/logs/print_results.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | from tabulate import tabulate
  5 | from collections import defaultdict
  6 | 
  7 | 
  8 | # Datasets and number of folds
  9 | DATASETS = [
 10 |             'Beijing-Opera',
 11 |             'CREMA-D',
 12 |             'ESC50-Actions',
 13 |             'ESC50',
 14 |             'GT-Music-Genre',
 15 |             'NS-Instruments',
 16 |             'RAVDESS',
 17 |             'SESA',
 18 |             'TUT2017',
 19 |             'UrbanSound8K',
 20 |             'VocalSound',
 21 |         ]
 22 | 
 23 | 
 24 | # methods = ['coop']
 25 | methods = ['zeroshot', 'coop', 'cocoop', 'palm']
 26 | 
 27 | accuracy_dict_all = defaultdict(list)
 28 | f1_score_dict_all = defaultdict(list)
 29 | 
 30 | accuracy_all = []
 31 | f1_score_all = []
 32 | 
 33 | for method in methods:    
 34 |     # Folder containing the JSON files
 35 |     results_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), method)
 36 | 
 37 | 
 38 |     accuracy_dict = json.load(open(os.path.join(results_folder, 'accuracy.json')))
 39 |     f1_score_dict = json.load(open(os.path.join(results_folder, 'f1_score.json')))
 40 | 
 41 | 
 42 |     for dataset in DATASETS:
 43 |         accuracy_dict_all[dataset].extend(accuracy_dict[dataset])
 44 |         f1_score_dict_all[dataset].extend(f1_score_dict[dataset])
 45 | 
 46 | 
 47 | 
 48 | 
 49 | # average accuracy and F1-score across all datasets
 50 | for dataset in DATASETS:
 51 |     accuracy_all.append([accuracy for accuracy in accuracy_dict_all[dataset]])
 52 |     f1_score_all.append([f1_score for f1_score in f1_score_dict_all[dataset]])
 53 | 
 54 | accuracy_all = np.array(accuracy_all)
 55 | f1_score_all = np.array(f1_score_all)
 56 | 
 57 | avg_accuracy_all = accuracy_all.mean(axis=0)
 58 | avg_f1_score_all = f1_score_all.mean(axis=0)
 59 | 
 60 | 
 61 | 
 62 | # print latex table
 63 | string_acc = ''
 64 | string_f1 = ''
 65 | for dataset in DATASETS:
 66 |     string_acc = string_acc +  f'{dataset} & ' + ' & '.join([f'{accuracy:0.4f}' for accuracy in accuracy_dict_all[dataset]]) + ' \\\\\n'
 67 |     string_f1 = string_f1 + f'{dataset} & ' + ' & '.join([f'{f1_score:0.4f}' for f1_score in f1_score_dict_all[dataset]]) + ' \\\\\n'
 68 | 
 69 | 
 70 | string_acc = string_acc +  f'\midrule\nAVERAGE & ' + ' & '.join([f'{accuracy:0.4f}' for accuracy in avg_accuracy_all]) + ' \\\\\n'
 71 | string_f1 = string_f1 + f'\midrule\nAVERAGE & ' + ' & '.join([f'{f1_score:0.4f}' for f1_score in avg_f1_score_all]) + ' \\\\\n'
 72 | 
 73 | 
 74 | top_row = f"DATASETS ↓ & ZERO SHOT & "
 75 | for method in methods[1:]:
 76 |     for seed in range(3): top_row = top_row + f"{method.upper()}-SEED{seed} & "
 77 |     top_row = top_row + f"{method.upper()}-AVG & "
 78 | top_row = top_row[:-2] + ' \\\\'
 79 | 
 80 | print("\n\n########## ACCURACY (LaTeX Table) ##########")
 81 | results_acc = top_row+"\n"+string_acc
 82 | print(results_acc)
 83 | 
 84 | print('\n\n')
 85 | print("\n\n########## F1-SCORE (LaTeX Table) ##########")
 86 | results_f1 = top_row+"\n"+string_f1
 87 | print(results_f1)
 88 | 
 89 | 
 90 | 
 91 | table_acc = []
 92 | for i, row in enumerate(results_acc.split("\n")):
 93 |     row_list = row.split("&")
 94 |     col_list = []
 95 |     for j, col in enumerate(row_list):
 96 |         if col.endswith("\\\\"): col = col[:-3]
 97 |         col = col.strip()
 98 |         col_list.append(col)
 99 |     if '\\midrule' in col_list or '' in col_list: continue
100 |     table_acc.append(col_list)
101 | print("\n\nAccuracy")
102 | print(tabulate(table_acc, tablefmt="simple"))
103 | 
104 | print("\n\n")
105 | 
106 | table_f1 = []
107 | for i, row in enumerate(results_f1.split("\n")):
108 |     row_list = row.split("&")
109 |     col_list = []
110 |     for j, col in enumerate(row_list):
111 |         if col.endswith("\\\\"): col = col[:-3]
112 |         col = col.strip()
113 |         col_list.append(col)
114 |     if '\\midrule' in col_list or '' in col_list: continue
115 |     table_f1.append(col_list)
116 | print("\n\nF1-Score")
117 | print(tabulate(table_f1, tablefmt="simple"))
118 | 
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/palm/palm.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from palm.encoders import AudioEncoder
  5 | from palm.encoders import TextEncoder
  6 | 
  7 | 
  8 | class PromptLearner(nn.Module):
  9 |     def __init__(self, args):
 10 |         super().__init__()
 11 | 
 12 |         self.args = args
 13 |         classnames = args.classnames
 14 |         n_cls = len(classnames)
 15 | 
 16 |         ctx_dim = args.ctx_dim 
 17 | 
 18 |         print("Initializing a generic context")
 19 |         ctx = torch.empty(n_cls, ctx_dim)
 20 |         torch.nn.init.normal_(ctx, std=0.02)
 21 |         self.ctx = torch.nn.Parameter(ctx)
 22 | 
 23 | 
 24 |         self.n_cls = n_cls
 25 |         self.lambdas = nn.Parameter(torch.rand(n_cls))
 26 | 
 27 | 
 28 |     def forward(self, audio_features, text_features):
 29 | 
 30 |         lambdas = torch.sigmoid(self.lambdas).reshape(-1,1)  # [n_cls, 1]
 31 |         
 32 |         updated_text_features = (1-lambdas)*text_features + (lambdas*self.ctx)     # [n_text_prompts, 1024]
 33 |         updated_text_features = updated_text_features / updated_text_features.norm(dim=-1, keepdim=True)
 34 | 
 35 |         return updated_text_features
 36 | 
 37 | class CustomPENGI(nn.Module):
 38 |     def __init__(self,args,pengi):
 39 |         super().__init__()
 40 | 
 41 |         self.args = args
 42 |         pengi_args  = pengi.args
 43 |         self.pengi_args = pengi_args
 44 | 
 45 |         self.audio_encoder = AudioEncoder(
 46 |                     pengi_args.audioenc_name, pengi_args.out_emb, pengi_args.d_proj,
 47 |                     pengi_args.sampling_rate, pengi_args.window_size, pengi_args.hop_size, pengi_args.mel_bins, pengi_args.fmin, pengi_args.fmax, pengi_args.classes_num, 
 48 |                     pengi_args.specaug, pengi_args.mixup, pengi_args.use_pretrained_audioencoder, pengi_args.freeze_audio_encoder_weights,
 49 |                     pengi_args.use_precomputed_melspec, pengi_args.pretrained_audioencoder_path)
 50 | 
 51 |         self.text_encoder = TextEncoder(
 52 |                     pengi_args.d_proj, 
 53 |                     pengi_args.text_model, pengi_args.transformer_embed_dim,
 54 |                     pengi_args.freeze_text_encoder_weights)
 55 | 
 56 | 
 57 |         # load the weights of the pengi pre-trained audio and text encoders
 58 |         print("\n\nPALM: loading the weights of the pengi pre-trained audio and text encoders ...\n\n")
 59 |         self.audio_encoder.load_state_dict(pengi.model.audio_encoder.state_dict())
 60 |         self.text_encoder.load_state_dict(pengi.model.caption_encoder.state_dict())
 61 | 
 62 |         self.audio_encoder.eval()
 63 |         self.text_encoder.eval()
 64 | 
 65 |         self.prompt_learner = PromptLearner(args)
 66 | 
 67 |         self.process_text = pengi.preprocess_text
 68 |         self.device = args.device
 69 | 
 70 | 
 71 |     def forward(self, audio):
 72 | 
 73 |         audio_features = self.audio_encoder(audio)[0] # audio_features shape [n_audio_files, 1024]
 74 |         audio_features = audio_features / audio_features.norm(dim=-1, keepdim=True)
 75 |  
 76 | 
 77 |         prompts = [f"{class_name}" for class_name in self.args.classnames]
 78 |         tokenized_prompts = self.process_text(prompts, enc_tok=True, add_text=False)
 79 |         prompts_tokens = tokenized_prompts['input_ids'].to(self.device) 
 80 |         prompts_attention_mask = tokenized_prompts['attention_mask'].to(self.device)
 81 | 
 82 |         with torch.no_grad():
 83 |             prompts_token_embeddings = self.text_encoder.base.embeddings.token_embedding(prompts_tokens)   # [batch_size, seq_length, embed_dim]
 84 |         
 85 |         text = {"input_ids": prompts_tokens, "inputs_embeds": prompts_token_embeddings, "attention_mask": prompts_attention_mask}
 86 |         text_features = self.text_encoder(text) # text_features shape [n_text_prompts, 1024]
 87 |         text_features = text_features / text_features.norm(dim=-1, keepdim=True)
 88 | 
 89 | 
 90 |         text_features = self.prompt_learner(audio_features, text_features) # text_features shape [n_text_prompts, 1024]
 91 |         
 92 | 
 93 |         logit_scale = 100.0
 94 |         logits = logit_scale * audio_features @ text_features.t()  # logits shape [n_audio_files, n_text_prompts]
 95 |         # breakpoint()
 96 | 
 97 |         return logits
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-Actions-FOLD1-SEED0.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ##############################################
 4 | PALM: Prompt Learning in Audio Language Models
 5 | ##############################################
 6 | 
 7 | 
 8 | 
 9 | 
10 | Model:    PENGI_ZEROSHOT
11 | Dataset:  ESC50-Actions
12 | Seed:     0
13 | 
14 | 
15 | Creating a 16-shot dataset ...
16 | 
17 | 
18 | ################## Dataset Information ##################
19 | FewShot Dataset
20 | 
21 | Root: REDACTED/Audio-Datasets/ESC50-Actions
22 | 
23 | Number of Classes: 10
24 | 
25 | Number of Shots: 16
26 | 
27 | Total Number of Samples: 160
28 | 
29 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring']
30 | 
31 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'}
32 | 
33 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9}
34 | ########################################################
35 | 
36 | 
37 | 
38 | 
39 | ################## Dataset Information ##################
40 | FewShot Dataset
41 | 
42 | Root: REDACTED/Audio-Datasets/ESC50-Actions
43 | 
44 | Number of Classes: 10
45 | 
46 | Number of Shots: -1
47 | 
48 | Total Number of Samples: 80
49 | 
50 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring']
51 | 
52 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'}
53 | 
54 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9}
55 | ########################################################
56 | 
57 | 
58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
59 |   warnings.warn(
60 | 
61 | 
62 | ################## Zero-Shot PENGI Information ##################
63 | Prompt Prefix:  The is a recording of 
64 | Prompts:  ['The is a recording of  breathing.', 'The is a recording of  brushing teeth.', 'The is a recording of  clapping.', 'The is a recording of  coughing.', 'The is a recording of  crying baby.', 'The is a recording of  drinking sipping.', 'The is a recording of  footsteps.', 'The is a recording of  laughing.', 'The is a recording of  sneezing.', 'The is a recording of  snoring.']
65 | ###################################################################
66 | 
67 | 
68 | args:  Namespace(batch_size=16, classnames=['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/ESC50-Actions', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='ESC50-Actions-FOLD1', freq_test_model=10, json_file_path='logs/pengi_zeroshot/ESC50-Actions-FOLD1.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7f30a54a8e80>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/ESC50-Actions', seed=0, spec_aug=False, start_epoch=0)
69 | 
70 | 
71 | Evaluating the model ...
72 | 
73 |   0%|          | 0/1 [00:00<?, ?it/s]
74 | 100%|##########| 1/1 [00:01<00:00,  1.20s/it]
75 | 100%|##########| 1/1 [00:01<00:00,  1.36s/it]
76 | 
77 | 
78 | Time & Date = 10:43 AM , 10_Jun_2024  GST
79 | 
80 | Total Time => 0 Hours : 0 Minutes : 1 Seconds
81 | 
82 | 
83 | Accuracy             = 0.7000
84 | F1-Score             = 0.6518
85 | Precision            = 0.7361
86 | Recall               = 0.7000
87 | Average Loss         = 1.0603
88 | 
89 | 
90 | Saving Results ...
91 | Results Saved
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-Actions-FOLD2-SEED0.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ##############################################
 4 | PALM: Prompt Learning in Audio Language Models
 5 | ##############################################
 6 | 
 7 | 
 8 | 
 9 | 
10 | Model:    PENGI_ZEROSHOT
11 | Dataset:  ESC50-Actions
12 | Seed:     0
13 | 
14 | 
15 | Creating a 16-shot dataset ...
16 | 
17 | 
18 | ################## Dataset Information ##################
19 | FewShot Dataset
20 | 
21 | Root: REDACTED/Audio-Datasets/ESC50-Actions
22 | 
23 | Number of Classes: 10
24 | 
25 | Number of Shots: 16
26 | 
27 | Total Number of Samples: 160
28 | 
29 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring']
30 | 
31 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'}
32 | 
33 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9}
34 | ########################################################
35 | 
36 | 
37 | 
38 | 
39 | ################## Dataset Information ##################
40 | FewShot Dataset
41 | 
42 | Root: REDACTED/Audio-Datasets/ESC50-Actions
43 | 
44 | Number of Classes: 10
45 | 
46 | Number of Shots: -1
47 | 
48 | Total Number of Samples: 80
49 | 
50 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring']
51 | 
52 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'}
53 | 
54 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9}
55 | ########################################################
56 | 
57 | 
58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
59 |   warnings.warn(
60 | 
61 | 
62 | ################## Zero-Shot PENGI Information ##################
63 | Prompt Prefix:  The is a recording of 
64 | Prompts:  ['The is a recording of  breathing.', 'The is a recording of  brushing teeth.', 'The is a recording of  clapping.', 'The is a recording of  coughing.', 'The is a recording of  crying baby.', 'The is a recording of  drinking sipping.', 'The is a recording of  footsteps.', 'The is a recording of  laughing.', 'The is a recording of  sneezing.', 'The is a recording of  snoring.']
65 | ###################################################################
66 | 
67 | 
68 | args:  Namespace(batch_size=16, classnames=['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/ESC50-Actions', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='ESC50-Actions-FOLD2', freq_test_model=10, json_file_path='logs/pengi_zeroshot/ESC50-Actions-FOLD2.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7fa3067c1e80>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/ESC50-Actions', seed=0, spec_aug=False, start_epoch=0)
69 | 
70 | 
71 | Evaluating the model ...
72 | 
73 |   0%|          | 0/1 [00:00<?, ?it/s]
74 | 100%|##########| 1/1 [00:01<00:00,  1.11s/it]
75 | 100%|##########| 1/1 [00:01<00:00,  1.33s/it]
76 | 
77 | 
78 | Time & Date = 10:44 AM , 10_Jun_2024  GST
79 | 
80 | Total Time => 0 Hours : 0 Minutes : 1 Seconds
81 | 
82 | 
83 | Accuracy             = 0.6375
84 | F1-Score             = 0.5951
85 | Precision            = 0.7354
86 | Recall               = 0.6375
87 | Average Loss         = 1.1233
88 | 
89 | 
90 | Saving Results ...
91 | Results Saved
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-Actions-FOLD3-SEED0.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ##############################################
 4 | PALM: Prompt Learning in Audio Language Models
 5 | ##############################################
 6 | 
 7 | 
 8 | 
 9 | 
10 | Model:    PENGI_ZEROSHOT
11 | Dataset:  ESC50-Actions
12 | Seed:     0
13 | 
14 | 
15 | Creating a 16-shot dataset ...
16 | 
17 | 
18 | ################## Dataset Information ##################
19 | FewShot Dataset
20 | 
21 | Root: REDACTED/Audio-Datasets/ESC50-Actions
22 | 
23 | Number of Classes: 10
24 | 
25 | Number of Shots: 16
26 | 
27 | Total Number of Samples: 160
28 | 
29 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring']
30 | 
31 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'}
32 | 
33 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9}
34 | ########################################################
35 | 
36 | 
37 | 
38 | 
39 | ################## Dataset Information ##################
40 | FewShot Dataset
41 | 
42 | Root: REDACTED/Audio-Datasets/ESC50-Actions
43 | 
44 | Number of Classes: 10
45 | 
46 | Number of Shots: -1
47 | 
48 | Total Number of Samples: 80
49 | 
50 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring']
51 | 
52 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'}
53 | 
54 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9}
55 | ########################################################
56 | 
57 | 
58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
59 |   warnings.warn(
60 | 
61 | 
62 | ################## Zero-Shot PENGI Information ##################
63 | Prompt Prefix:  The is a recording of 
64 | Prompts:  ['The is a recording of  breathing.', 'The is a recording of  brushing teeth.', 'The is a recording of  clapping.', 'The is a recording of  coughing.', 'The is a recording of  crying baby.', 'The is a recording of  drinking sipping.', 'The is a recording of  footsteps.', 'The is a recording of  laughing.', 'The is a recording of  sneezing.', 'The is a recording of  snoring.']
65 | ###################################################################
66 | 
67 | 
68 | args:  Namespace(batch_size=16, classnames=['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/ESC50-Actions', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='ESC50-Actions-FOLD3', freq_test_model=10, json_file_path='logs/pengi_zeroshot/ESC50-Actions-FOLD3.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7ffacbd84e80>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/ESC50-Actions', seed=0, spec_aug=False, start_epoch=0)
69 | 
70 | 
71 | Evaluating the model ...
72 | 
73 |   0%|          | 0/1 [00:00<?, ?it/s]
74 | 100%|##########| 1/1 [00:01<00:00,  1.10s/it]
75 | 100%|##########| 1/1 [00:01<00:00,  1.26s/it]
76 | 
77 | 
78 | Time & Date = 10:44 AM , 10_Jun_2024  GST
79 | 
80 | Total Time => 0 Hours : 0 Minutes : 1 Seconds
81 | 
82 | 
83 | Accuracy             = 0.6750
84 | F1-Score             = 0.6251
85 | Precision            = 0.6599
86 | Recall               = 0.6750
87 | Average Loss         = 0.8852
88 | 
89 | 
90 | Saving Results ...
91 | Results Saved
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/logs/zeroshot/ESC50-Actions-FOLD5-SEED0.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ##############################################
 4 | PALM: Prompt Learning in Audio Language Models
 5 | ##############################################
 6 | 
 7 | 
 8 | 
 9 | 
10 | Model:    PENGI_ZEROSHOT
11 | Dataset:  ESC50-Actions
12 | Seed:     0
13 | 
14 | 
15 | Creating a 16-shot dataset ...
16 | 
17 | 
18 | ################## Dataset Information ##################
19 | FewShot Dataset
20 | 
21 | Root: REDACTED/Audio-Datasets/ESC50-Actions
22 | 
23 | Number of Classes: 10
24 | 
25 | Number of Shots: 16
26 | 
27 | Total Number of Samples: 160
28 | 
29 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring']
30 | 
31 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'}
32 | 
33 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9}
34 | ########################################################
35 | 
36 | 
37 | 
38 | 
39 | ################## Dataset Information ##################
40 | FewShot Dataset
41 | 
42 | Root: REDACTED/Audio-Datasets/ESC50-Actions
43 | 
44 | Number of Classes: 10
45 | 
46 | Number of Shots: -1
47 | 
48 | Total Number of Samples: 80
49 | 
50 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring']
51 | 
52 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'}
53 | 
54 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9}
55 | ########################################################
56 | 
57 | 
58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
59 |   warnings.warn(
60 | 
61 | 
62 | ################## Zero-Shot PENGI Information ##################
63 | Prompt Prefix:  The is a recording of 
64 | Prompts:  ['The is a recording of  breathing.', 'The is a recording of  brushing teeth.', 'The is a recording of  clapping.', 'The is a recording of  coughing.', 'The is a recording of  crying baby.', 'The is a recording of  drinking sipping.', 'The is a recording of  footsteps.', 'The is a recording of  laughing.', 'The is a recording of  sneezing.', 'The is a recording of  snoring.']
65 | ###################################################################
66 | 
67 | 
68 | args:  Namespace(batch_size=16, classnames=['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/ESC50-Actions', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='ESC50-Actions-FOLD5', freq_test_model=10, json_file_path='logs/pengi_zeroshot/ESC50-Actions-FOLD5.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7f72c871ee80>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/ESC50-Actions', seed=0, spec_aug=False, start_epoch=0)
69 | 
70 | 
71 | Evaluating the model ...
72 | 
73 |   0%|          | 0/1 [00:00<?, ?it/s]
74 | 100%|##########| 1/1 [00:01<00:00,  1.09s/it]
75 | 100%|##########| 1/1 [00:01<00:00,  1.31s/it]
76 | 
77 | 
78 | Time & Date = 10:46 AM , 10_Jun_2024  GST
79 | 
80 | Total Time => 0 Hours : 0 Minutes : 1 Seconds
81 | 
82 | 
83 | Accuracy             = 0.6375
84 | F1-Score             = 0.6191
85 | Precision            = 0.6751
86 | Recall               = 0.6375
87 | Average Loss         = 1.0817
88 | 
89 | 
90 | Saving Results ...
91 | Results Saved
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/logs/zeroshot/Beijing-Opera-FOLD1-SEED0.log:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ##############################################
  4 | PALM: Prompt Learning in Audio Language Models
  5 | ##############################################
  6 | 
  7 | 
  8 | 
  9 | 
 10 | Model:    PENGI_ZEROSHOT
 11 | Dataset:  Beijing-Opera
 12 | Seed:     0
 13 | 
 14 | 
 15 | Creating a 16-shot dataset ...
 16 | 
 17 | 
 18 | ################## Dataset Information ##################
 19 | FewShot Dataset
 20 | 
 21 | Root: REDACTED/Audio-Datasets/Beijing-Opera
 22 | 
 23 | Number of Classes: 4
 24 | 
 25 | Number of Shots: 16
 26 | 
 27 | Total Number of Samples: 64
 28 | 
 29 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo']
 30 | 
 31 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'}
 32 | 
 33 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3}
 34 | ########################################################
 35 | 
 36 | 
 37 | 
 38 | 
 39 | ################## Dataset Information ##################
 40 | FewShot Dataset
 41 | 
 42 | Root: REDACTED/Audio-Datasets/Beijing-Opera
 43 | 
 44 | Number of Classes: 4
 45 | 
 46 | Number of Shots: -1
 47 | 
 48 | Total Number of Samples: 48
 49 | 
 50 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo']
 51 | 
 52 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'}
 53 | 
 54 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3}
 55 | ########################################################
 56 | 
 57 | 
 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
 59 |   warnings.warn(
 60 | 
 61 | 
 62 | ################## Zero-Shot PENGI Information ##################
 63 | Prompt Prefix:  The is a recording of 
 64 | Prompts:  ['The is a recording of  bangu.', 'The is a recording of  daluo.', 'The is a recording of  naobo.', 'The is a recording of  xiaoluo.']
 65 | ###################################################################
 66 | 
 67 | 
 68 | args:  Namespace(batch_size=16, classnames=['bangu', 'daluo', 'naobo', 'xiaoluo'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/Beijing-Opera', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='Beijing-Opera-FOLD1', freq_test_model=10, json_file_path='logs/pengi_zeroshot/Beijing-Opera-FOLD1.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7f920a7b6e50>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/Beijing-Opera', seed=0, spec_aug=False, start_epoch=0)
 69 | 
 70 | 
 71 | Evaluating the model ...
 72 | 
 73 |   0%|          | 0/1 [00:00<?, ?it/s]
 74 | 100%|##########| 1/1 [00:01<00:00,  1.17s/it]
 75 | 100%|##########| 1/1 [00:01<00:00,  1.36s/it]
 76 | 
 77 | 
 78 | Time & Date = 10:42 AM , 10_Jun_2024  GST
 79 | 
 80 | Total Time => 0 Hours : 0 Minutes : 1 Seconds
 81 | 
 82 | 
 83 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 84 |   _warn_prf(average, modifier, msg_start, len(result))
 85 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 86 |   _warn_prf(average, modifier, msg_start, len(result))
 87 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 88 |   _warn_prf(average, modifier, msg_start, len(result))
 89 | Accuracy             = 0.2917
 90 | F1-Score             = 0.1468
 91 | Precision            = 0.3191
 92 | Recall               = 0.2708
 93 | Average Loss         = 4.0296
 94 | 
 95 | 
 96 | Saving Results ...
 97 | Results Saved
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/logs/zeroshot/Beijing-Opera-FOLD2-SEED0.log:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ##############################################
  4 | PALM: Prompt Learning in Audio Language Models
  5 | ##############################################
  6 | 
  7 | 
  8 | 
  9 | 
 10 | Model:    PENGI_ZEROSHOT
 11 | Dataset:  Beijing-Opera
 12 | Seed:     0
 13 | 
 14 | 
 15 | Creating a 16-shot dataset ...
 16 | 
 17 | 
 18 | ################## Dataset Information ##################
 19 | FewShot Dataset
 20 | 
 21 | Root: REDACTED/Audio-Datasets/Beijing-Opera
 22 | 
 23 | Number of Classes: 4
 24 | 
 25 | Number of Shots: 16
 26 | 
 27 | Total Number of Samples: 64
 28 | 
 29 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo']
 30 | 
 31 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'}
 32 | 
 33 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3}
 34 | ########################################################
 35 | 
 36 | 
 37 | 
 38 | 
 39 | ################## Dataset Information ##################
 40 | FewShot Dataset
 41 | 
 42 | Root: REDACTED/Audio-Datasets/Beijing-Opera
 43 | 
 44 | Number of Classes: 4
 45 | 
 46 | Number of Shots: -1
 47 | 
 48 | Total Number of Samples: 47
 49 | 
 50 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo']
 51 | 
 52 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'}
 53 | 
 54 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3}
 55 | ########################################################
 56 | 
 57 | 
 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
 59 |   warnings.warn(
 60 | 
 61 | 
 62 | ################## Zero-Shot PENGI Information ##################
 63 | Prompt Prefix:  The is a recording of 
 64 | Prompts:  ['The is a recording of  bangu.', 'The is a recording of  daluo.', 'The is a recording of  naobo.', 'The is a recording of  xiaoluo.']
 65 | ###################################################################
 66 | 
 67 | 
 68 | args:  Namespace(batch_size=16, classnames=['bangu', 'daluo', 'naobo', 'xiaoluo'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/Beijing-Opera', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='Beijing-Opera-FOLD2', freq_test_model=10, json_file_path='logs/pengi_zeroshot/Beijing-Opera-FOLD2.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7f8928963e50>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/Beijing-Opera', seed=0, spec_aug=False, start_epoch=0)
 69 | 
 70 | 
 71 | Evaluating the model ...
 72 | 
 73 |   0%|          | 0/1 [00:00<?, ?it/s]
 74 | 100%|##########| 1/1 [00:00<00:00,  1.56it/s]
 75 | 100%|##########| 1/1 [00:00<00:00,  1.16it/s]
 76 | 
 77 | 
 78 | Time & Date = 10:43 AM , 10_Jun_2024  GST
 79 | 
 80 | Total Time => 0 Hours : 0 Minutes : 1 Seconds
 81 | 
 82 | 
 83 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 84 |   _warn_prf(average, modifier, msg_start, len(result))
 85 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 86 |   _warn_prf(average, modifier, msg_start, len(result))
 87 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 88 |   _warn_prf(average, modifier, msg_start, len(result))
 89 | Accuracy             = 0.2766
 90 | F1-Score             = 0.1083
 91 | Precision            = 0.0691
 92 | Recall               = 0.2500
 93 | Average Loss         = 3.7753
 94 | 
 95 | 
 96 | Saving Results ...
 97 | Results Saved
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/logs/zeroshot/Beijing-Opera-FOLD3-SEED0.log:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ##############################################
  4 | PALM: Prompt Learning in Audio Language Models
  5 | ##############################################
  6 | 
  7 | 
  8 | 
  9 | 
 10 | Model:    PENGI_ZEROSHOT
 11 | Dataset:  Beijing-Opera
 12 | Seed:     0
 13 | 
 14 | 
 15 | Creating a 16-shot dataset ...
 16 | 
 17 | 
 18 | ################## Dataset Information ##################
 19 | FewShot Dataset
 20 | 
 21 | Root: REDACTED/Audio-Datasets/Beijing-Opera
 22 | 
 23 | Number of Classes: 4
 24 | 
 25 | Number of Shots: 16
 26 | 
 27 | Total Number of Samples: 64
 28 | 
 29 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo']
 30 | 
 31 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'}
 32 | 
 33 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3}
 34 | ########################################################
 35 | 
 36 | 
 37 | 
 38 | 
 39 | ################## Dataset Information ##################
 40 | FewShot Dataset
 41 | 
 42 | Root: REDACTED/Audio-Datasets/Beijing-Opera
 43 | 
 44 | Number of Classes: 4
 45 | 
 46 | Number of Shots: -1
 47 | 
 48 | Total Number of Samples: 47
 49 | 
 50 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo']
 51 | 
 52 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'}
 53 | 
 54 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3}
 55 | ########################################################
 56 | 
 57 | 
 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
 59 |   warnings.warn(
 60 | 
 61 | 
 62 | ################## Zero-Shot PENGI Information ##################
 63 | Prompt Prefix:  The is a recording of 
 64 | Prompts:  ['The is a recording of  bangu.', 'The is a recording of  daluo.', 'The is a recording of  naobo.', 'The is a recording of  xiaoluo.']
 65 | ###################################################################
 66 | 
 67 | 
 68 | args:  Namespace(batch_size=16, classnames=['bangu', 'daluo', 'naobo', 'xiaoluo'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/Beijing-Opera', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='Beijing-Opera-FOLD3', freq_test_model=10, json_file_path='logs/pengi_zeroshot/Beijing-Opera-FOLD3.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7f1917b81e50>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/Beijing-Opera', seed=0, spec_aug=False, start_epoch=0)
 69 | 
 70 | 
 71 | Evaluating the model ...
 72 | 
 73 |   0%|          | 0/1 [00:00<?, ?it/s]
 74 | 100%|##########| 1/1 [00:00<00:00,  1.56it/s]
 75 | 100%|##########| 1/1 [00:00<00:00,  1.20it/s]
 76 | 
 77 | 
 78 | Time & Date = 10:44 AM , 10_Jun_2024  GST
 79 | 
 80 | Total Time => 0 Hours : 0 Minutes : 1 Seconds
 81 | 
 82 | 
 83 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 84 |   _warn_prf(average, modifier, msg_start, len(result))
 85 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 86 |   _warn_prf(average, modifier, msg_start, len(result))
 87 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 88 |   _warn_prf(average, modifier, msg_start, len(result))
 89 | Accuracy             = 0.2766
 90 | F1-Score             = 0.1083
 91 | Precision            = 0.0691
 92 | Recall               = 0.2500
 93 | Average Loss         = 3.8906
 94 | 
 95 | 
 96 | Saving Results ...
 97 | Results Saved
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/logs/zeroshot/Beijing-Opera-FOLD4-SEED0.log:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ##############################################
  4 | PALM: Prompt Learning in Audio Language Models
  5 | ##############################################
  6 | 
  7 | 
  8 | 
  9 | 
 10 | Model:    PENGI_ZEROSHOT
 11 | Dataset:  Beijing-Opera
 12 | Seed:     0
 13 | 
 14 | 
 15 | Creating a 16-shot dataset ...
 16 | 
 17 | 
 18 | ################## Dataset Information ##################
 19 | FewShot Dataset
 20 | 
 21 | Root: REDACTED/Audio-Datasets/Beijing-Opera
 22 | 
 23 | Number of Classes: 4
 24 | 
 25 | Number of Shots: 16
 26 | 
 27 | Total Number of Samples: 64
 28 | 
 29 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo']
 30 | 
 31 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'}
 32 | 
 33 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3}
 34 | ########################################################
 35 | 
 36 | 
 37 | 
 38 | 
 39 | ################## Dataset Information ##################
 40 | FewShot Dataset
 41 | 
 42 | Root: REDACTED/Audio-Datasets/Beijing-Opera
 43 | 
 44 | Number of Classes: 4
 45 | 
 46 | Number of Shots: -1
 47 | 
 48 | Total Number of Samples: 47
 49 | 
 50 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo']
 51 | 
 52 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'}
 53 | 
 54 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3}
 55 | ########################################################
 56 | 
 57 | 
 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
 59 |   warnings.warn(
 60 | 
 61 | 
 62 | ################## Zero-Shot PENGI Information ##################
 63 | Prompt Prefix:  The is a recording of 
 64 | Prompts:  ['The is a recording of  bangu.', 'The is a recording of  daluo.', 'The is a recording of  naobo.', 'The is a recording of  xiaoluo.']
 65 | ###################################################################
 66 | 
 67 | 
 68 | args:  Namespace(batch_size=16, classnames=['bangu', 'daluo', 'naobo', 'xiaoluo'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/Beijing-Opera', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='Beijing-Opera-FOLD4', freq_test_model=10, json_file_path='logs/pengi_zeroshot/Beijing-Opera-FOLD4.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7fc368442e50>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/Beijing-Opera', seed=0, spec_aug=False, start_epoch=0)
 69 | 
 70 | 
 71 | Evaluating the model ...
 72 | 
 73 |   0%|          | 0/1 [00:00<?, ?it/s]
 74 | 100%|##########| 1/1 [00:00<00:00,  1.61it/s]
 75 | 100%|##########| 1/1 [00:00<00:00,  1.18it/s]
 76 | 
 77 | 
 78 | Time & Date = 10:44 AM , 10_Jun_2024  GST
 79 | 
 80 | Total Time => 0 Hours : 0 Minutes : 1 Seconds
 81 | 
 82 | 
 83 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 84 |   _warn_prf(average, modifier, msg_start, len(result))
 85 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 86 |   _warn_prf(average, modifier, msg_start, len(result))
 87 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 88 |   _warn_prf(average, modifier, msg_start, len(result))
 89 | Accuracy             = 0.2766
 90 | F1-Score             = 0.1083
 91 | Precision            = 0.0691
 92 | Recall               = 0.2500
 93 | Average Loss         = 3.9215
 94 | 
 95 | 
 96 | Saving Results ...
 97 | Results Saved
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/logs/zeroshot/Beijing-Opera-FOLD5-SEED0.log:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ##############################################
  4 | PALM: Prompt Learning in Audio Language Models
  5 | ##############################################
  6 | 
  7 | 
  8 | 
  9 | 
 10 | Model:    PENGI_ZEROSHOT
 11 | Dataset:  Beijing-Opera
 12 | Seed:     0
 13 | 
 14 | 
 15 | Creating a 16-shot dataset ...
 16 | 
 17 | 
 18 | ################## Dataset Information ##################
 19 | FewShot Dataset
 20 | 
 21 | Root: REDACTED/Audio-Datasets/Beijing-Opera
 22 | 
 23 | Number of Classes: 4
 24 | 
 25 | Number of Shots: 16
 26 | 
 27 | Total Number of Samples: 64
 28 | 
 29 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo']
 30 | 
 31 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'}
 32 | 
 33 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3}
 34 | ########################################################
 35 | 
 36 | 
 37 | 
 38 | 
 39 | ################## Dataset Information ##################
 40 | FewShot Dataset
 41 | 
 42 | Root: REDACTED/Audio-Datasets/Beijing-Opera
 43 | 
 44 | Number of Classes: 4
 45 | 
 46 | Number of Shots: -1
 47 | 
 48 | Total Number of Samples: 47
 49 | 
 50 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo']
 51 | 
 52 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'}
 53 | 
 54 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3}
 55 | ########################################################
 56 | 
 57 | 
 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
 59 |   warnings.warn(
 60 | 
 61 | 
 62 | ################## Zero-Shot PENGI Information ##################
 63 | Prompt Prefix:  The is a recording of 
 64 | Prompts:  ['The is a recording of  bangu.', 'The is a recording of  daluo.', 'The is a recording of  naobo.', 'The is a recording of  xiaoluo.']
 65 | ###################################################################
 66 | 
 67 | 
 68 | args:  Namespace(batch_size=16, classnames=['bangu', 'daluo', 'naobo', 'xiaoluo'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/Beijing-Opera', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='Beijing-Opera-FOLD5', freq_test_model=10, json_file_path='logs/pengi_zeroshot/Beijing-Opera-FOLD5.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7fcb8b746e50>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/Beijing-Opera', seed=0, spec_aug=False, start_epoch=0)
 69 | 
 70 | 
 71 | Evaluating the model ...
 72 | 
 73 |   0%|          | 0/1 [00:00<?, ?it/s]
 74 | 100%|##########| 1/1 [00:00<00:00,  1.74it/s]
 75 | 100%|##########| 1/1 [00:00<00:00,  1.29it/s]
 76 | 
 77 | 
 78 | Time & Date = 10:45 AM , 10_Jun_2024  GST
 79 | 
 80 | Total Time => 0 Hours : 0 Minutes : 1 Seconds
 81 | 
 82 | 
 83 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 84 |   _warn_prf(average, modifier, msg_start, len(result))
 85 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 86 |   _warn_prf(average, modifier, msg_start, len(result))
 87 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
 88 |   _warn_prf(average, modifier, msg_start, len(result))
 89 | Accuracy             = 0.3191
 90 | F1-Score             = 0.1890
 91 | Precision            = 0.3222
 92 | Recall               = 0.2955
 93 | Average Loss         = 3.6894
 94 | 
 95 | 
 96 | Saving Results ...
 97 | Results Saved
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD1-SEED0.log:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ##############################################
  4 | PALM: Prompt Learning in Audio Language Models
  5 | ##############################################
  6 | 
  7 | 
  8 | 
  9 | 
 10 | Model:    PENGI_ZEROSHOT
 11 | Dataset:  UrbanSound8K
 12 | Seed:     0
 13 | 
 14 | 
 15 | Creating a 16-shot dataset ...
 16 | 
 17 | 
 18 | ################## Dataset Information ##################
 19 | FewShot Dataset
 20 | 
 21 | Root: REDACTED/Audio-Datasets/UrbanSound8K
 22 | 
 23 | Number of Classes: 10
 24 | 
 25 | Number of Shots: 16
 26 | 
 27 | Total Number of Samples: 160
 28 | 
 29 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music']
 30 | 
 31 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'}
 32 | 
 33 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9}
 34 | ########################################################
 35 | 
 36 | 
 37 | 
 38 | 
 39 | ################## Dataset Information ##################
 40 | FewShot Dataset
 41 | 
 42 | Root: REDACTED/Audio-Datasets/UrbanSound8K
 43 | 
 44 | Number of Classes: 10
 45 | 
 46 | Number of Shots: -1
 47 | 
 48 | Total Number of Samples: 873
 49 | 
 50 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music']
 51 | 
 52 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'}
 53 | 
 54 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9}
 55 | ########################################################
 56 | 
 57 | 
 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
 59 |   warnings.warn(
 60 | 
 61 | 
 62 | ################## Zero-Shot PENGI Information ##################
 63 | Prompt Prefix:  The is a recording of 
 64 | Prompts:  ['The is a recording of  air conditioner.', 'The is a recording of  car horn.', 'The is a recording of  children playing.', 'The is a recording of  dog bark.', 'The is a recording of  drilling.', 'The is a recording of  engine idling.', 'The is a recording of  gun shot.', 'The is a recording of  jackhammer.', 'The is a recording of  siren.', 'The is a recording of  street music.']
 65 | ###################################################################
 66 | 
 67 | 
 68 | args:  Namespace(batch_size=16, classnames=['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/UrbanSound8K', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='UrbanSound8K-FOLD1', freq_test_model=10, json_file_path='logs/pengi_zeroshot/UrbanSound8K-FOLD1.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7f61a3f60e50>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/UrbanSound8K', seed=0, spec_aug=False, start_epoch=0)
 69 | 
 70 | 
 71 | Evaluating the model ...
 72 | 
 73 |   0%|          | 0/7 [00:00<?, ?it/s]
 74 |  14%|#4        | 1/7 [00:03<00:22,  3.83s/it]
 75 |  29%|##8       | 2/7 [00:04<00:08,  1.68s/it]
 76 |  43%|####2     | 3/7 [00:04<00:04,  1.02s/it]
 77 |  57%|#####7    | 4/7 [00:04<00:02,  1.35it/s]
 78 |  71%|#######1  | 5/7 [00:07<00:03,  1.52s/it]
 79 |  86%|########5 | 6/7 [00:07<00:01,  1.07s/it]
 80 | 100%|##########| 7/7 [00:07<00:00,  1.28it/s]
 81 | 100%|##########| 7/7 [00:08<00:00,  1.15s/it]
 82 | 
 83 | 
 84 | Time & Date = 10:14 PM , 09_Jun_2024
 85 | 
 86 | Total Time => 0 Hours : 0 Minutes : 8 Seconds
 87 | 
 88 | 
 89 | Accuracy             = 0.5487
 90 | F1-Score             = 0.5236
 91 | Precision            = 0.5844
 92 | Recall               = 0.5538
 93 | Average Loss         = 1.6837
 94 | 
 95 | 
 96 | Saving Results ...
 97 | Results Saved
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD10-SEED0.log:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ##############################################
  4 | PALM: Prompt Learning in Audio Language Models
  5 | ##############################################
  6 | 
  7 | 
  8 | 
  9 | 
 10 | Model:    PENGI_ZEROSHOT
 11 | Dataset:  UrbanSound8K
 12 | Seed:     0
 13 | 
 14 | 
 15 | Creating a 16-shot dataset ...
 16 | 
 17 | 
 18 | ################## Dataset Information ##################
 19 | FewShot Dataset
 20 | 
 21 | Root: REDACTED/Audio-Datasets/UrbanSound8K
 22 | 
 23 | Number of Classes: 10
 24 | 
 25 | Number of Shots: 16
 26 | 
 27 | Total Number of Samples: 160
 28 | 
 29 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music']
 30 | 
 31 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'}
 32 | 
 33 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9}
 34 | ########################################################
 35 | 
 36 | 
 37 | 
 38 | 
 39 | ################## Dataset Information ##################
 40 | FewShot Dataset
 41 | 
 42 | Root: REDACTED/Audio-Datasets/UrbanSound8K
 43 | 
 44 | Number of Classes: 10
 45 | 
 46 | Number of Shots: -1
 47 | 
 48 | Total Number of Samples: 837
 49 | 
 50 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music']
 51 | 
 52 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'}
 53 | 
 54 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9}
 55 | ########################################################
 56 | 
 57 | 
 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
 59 |   warnings.warn(
 60 | 
 61 | 
 62 | ################## Zero-Shot PENGI Information ##################
 63 | Prompt Prefix:  The is a recording of 
 64 | Prompts:  ['The is a recording of  air conditioner.', 'The is a recording of  car horn.', 'The is a recording of  children playing.', 'The is a recording of  dog bark.', 'The is a recording of  drilling.', 'The is a recording of  engine idling.', 'The is a recording of  gun shot.', 'The is a recording of  jackhammer.', 'The is a recording of  siren.', 'The is a recording of  street music.']
 65 | ###################################################################
 66 | 
 67 | 
 68 | args:  Namespace(batch_size=16, classnames=['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/UrbanSound8K', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='UrbanSound8K-FOLD10', freq_test_model=10, json_file_path='logs/pengi_zeroshot/UrbanSound8K-FOLD10.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7f998f93ee50>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/UrbanSound8K', seed=0, spec_aug=False, start_epoch=0)
 69 | 
 70 | 
 71 | Evaluating the model ...
 72 | 
 73 |   0%|          | 0/7 [00:00<?, ?it/s]
 74 |  14%|#4        | 1/7 [00:02<00:17,  2.86s/it]
 75 |  29%|##8       | 2/7 [00:03<00:07,  1.57s/it]
 76 |  43%|####2     | 3/7 [00:03<00:03,  1.04it/s]
 77 |  57%|#####7    | 4/7 [00:03<00:01,  1.54it/s]
 78 |  71%|#######1  | 5/7 [00:04<00:01,  1.37it/s]
 79 |  86%|########5 | 6/7 [00:05<00:00,  1.48it/s]
 80 | 100%|##########| 7/7 [00:05<00:00,  1.98it/s]
 81 | 100%|##########| 7/7 [00:05<00:00,  1.22it/s]
 82 | 
 83 | 
 84 | Time & Date = 10:21 PM , 09_Jun_2024
 85 | 
 86 | Total Time => 0 Hours : 0 Minutes : 6 Seconds
 87 | 
 88 | 
 89 | Accuracy             = 0.5078
 90 | F1-Score             = 0.4733
 91 | Precision            = 0.5256
 92 | Recall               = 0.5120
 93 | Average Loss         = 1.7276
 94 | 
 95 | 
 96 | Saving Results ...
 97 | Results Saved
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD2-SEED0.log:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ##############################################
  4 | PALM: Prompt Learning in Audio Language Models
  5 | ##############################################
  6 | 
  7 | 
  8 | 
  9 | 
 10 | Model:    PENGI_ZEROSHOT
 11 | Dataset:  UrbanSound8K
 12 | Seed:     0
 13 | 
 14 | 
 15 | Creating a 16-shot dataset ...
 16 | 
 17 | 
 18 | ################## Dataset Information ##################
 19 | FewShot Dataset
 20 | 
 21 | Root: REDACTED/Audio-Datasets/UrbanSound8K
 22 | 
 23 | Number of Classes: 10
 24 | 
 25 | Number of Shots: 16
 26 | 
 27 | Total Number of Samples: 160
 28 | 
 29 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music']
 30 | 
 31 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'}
 32 | 
 33 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9}
 34 | ########################################################
 35 | 
 36 | 
 37 | 
 38 | 
 39 | ################## Dataset Information ##################
 40 | FewShot Dataset
 41 | 
 42 | Root: REDACTED/Audio-Datasets/UrbanSound8K
 43 | 
 44 | Number of Classes: 10
 45 | 
 46 | Number of Shots: -1
 47 | 
 48 | Total Number of Samples: 888
 49 | 
 50 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music']
 51 | 
 52 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'}
 53 | 
 54 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9}
 55 | ########################################################
 56 | 
 57 | 
 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
 59 |   warnings.warn(
 60 | 
 61 | 
 62 | ################## Zero-Shot PENGI Information ##################
 63 | Prompt Prefix:  The is a recording of 
 64 | Prompts:  ['The is a recording of  air conditioner.', 'The is a recording of  car horn.', 'The is a recording of  children playing.', 'The is a recording of  dog bark.', 'The is a recording of  drilling.', 'The is a recording of  engine idling.', 'The is a recording of  gun shot.', 'The is a recording of  jackhammer.', 'The is a recording of  siren.', 'The is a recording of  street music.']
 65 | ###################################################################
 66 | 
 67 | 
 68 | args:  Namespace(batch_size=16, classnames=['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/UrbanSound8K', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='UrbanSound8K-FOLD2', freq_test_model=10, json_file_path='logs/pengi_zeroshot/UrbanSound8K-FOLD2.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7f77975a0e50>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/UrbanSound8K', seed=0, spec_aug=False, start_epoch=0)
 69 | 
 70 | 
 71 | Evaluating the model ...
 72 | 
 73 |   0%|          | 0/7 [00:00<?, ?it/s]
 74 |  14%|#4        | 1/7 [00:02<00:16,  2.74s/it]
 75 |  29%|##8       | 2/7 [00:02<00:06,  1.23s/it]
 76 |  43%|####2     | 3/7 [00:03<00:03,  1.29it/s]
 77 |  57%|#####7    | 4/7 [00:03<00:01,  1.60it/s]
 78 |  71%|#######1  | 5/7 [00:04<00:01,  1.36it/s]
 79 |  86%|########5 | 6/7 [00:04<00:00,  1.83it/s]
 80 | 100%|##########| 7/7 [00:04<00:00,  2.21it/s]
 81 | 100%|##########| 7/7 [00:05<00:00,  1.36it/s]
 82 | 
 83 | 
 84 | Time & Date = 10:15 PM , 09_Jun_2024
 85 | 
 86 | Total Time => 0 Hours : 0 Minutes : 5 Seconds
 87 | 
 88 | 
 89 | Accuracy             = 0.5450
 90 | F1-Score             = 0.5010
 91 | Precision            = 0.5382
 92 | Recall               = 0.5059
 93 | Average Loss         = 1.6292
 94 | 
 95 | 
 96 | Saving Results ...
 97 | Results Saved
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/logs/zeroshot/UrbanSound8K-FOLD5-SEED0.log:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ##############################################
  4 | PALM: Prompt Learning in Audio Language Models
  5 | ##############################################
  6 | 
  7 | 
  8 | 
  9 | 
 10 | Model:    PENGI_ZEROSHOT
 11 | Dataset:  UrbanSound8K
 12 | Seed:     0
 13 | 
 14 | 
 15 | Creating a 16-shot dataset ...
 16 | 
 17 | 
 18 | ################## Dataset Information ##################
 19 | FewShot Dataset
 20 | 
 21 | Root: REDACTED/Audio-Datasets/UrbanSound8K
 22 | 
 23 | Number of Classes: 10
 24 | 
 25 | Number of Shots: 16
 26 | 
 27 | Total Number of Samples: 160
 28 | 
 29 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music']
 30 | 
 31 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'}
 32 | 
 33 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9}
 34 | ########################################################
 35 | 
 36 | 
 37 | 
 38 | 
 39 | ################## Dataset Information ##################
 40 | FewShot Dataset
 41 | 
 42 | Root: REDACTED/Audio-Datasets/UrbanSound8K
 43 | 
 44 | Number of Classes: 10
 45 | 
 46 | Number of Shots: -1
 47 | 
 48 | Total Number of Samples: 936
 49 | 
 50 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music']
 51 | 
 52 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'}
 53 | 
 54 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9}
 55 | ########################################################
 56 | 
 57 | 
 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
 59 |   warnings.warn(
 60 | 
 61 | 
 62 | ################## Zero-Shot PENGI Information ##################
 63 | Prompt Prefix:  The is a recording of 
 64 | Prompts:  ['The is a recording of  air conditioner.', 'The is a recording of  car horn.', 'The is a recording of  children playing.', 'The is a recording of  dog bark.', 'The is a recording of  drilling.', 'The is a recording of  engine idling.', 'The is a recording of  gun shot.', 'The is a recording of  jackhammer.', 'The is a recording of  siren.', 'The is a recording of  street music.']
 65 | ###################################################################
 66 | 
 67 | 
 68 | args:  Namespace(batch_size=16, classnames=['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/UrbanSound8K', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='UrbanSound8K-FOLD5', freq_test_model=10, json_file_path='logs/pengi_zeroshot/UrbanSound8K-FOLD5.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=<bound method PengiWrapper.preprocess_audio of <pengi.wrapper.PengiWrapper object at 0x7fceb78e9e50>>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/UrbanSound8K', seed=0, spec_aug=False, start_epoch=0)
 69 | 
 70 | 
 71 | Evaluating the model ...
 72 | 
 73 |   0%|          | 0/8 [00:00<?, ?it/s]
 74 |  12%|#2        | 1/8 [00:01<00:13,  1.97s/it]
 75 |  25%|##5       | 2/8 [00:02<00:06,  1.15s/it]
 76 |  38%|###7      | 3/8 [00:02<00:04,  1.22it/s]
 77 |  50%|#####     | 4/8 [00:03<00:02,  1.52it/s]
 78 |  62%|######2   | 5/8 [00:03<00:01,  1.77it/s]
 79 |  75%|#######5  | 6/8 [00:04<00:01,  1.66it/s]
 80 |  88%|########7 | 7/8 [00:04<00:00,  2.09it/s]
 81 | 100%|##########| 8/8 [00:04<00:00,  1.60it/s]
 82 | 
 83 | 
 84 | Time & Date = 10:17 PM , 09_Jun_2024
 85 | 
 86 | Total Time => 0 Hours : 0 Minutes : 5 Seconds
 87 | 
 88 | 
 89 | Accuracy             = 0.6047
 90 | F1-Score             = 0.5699
 91 | Precision            = 0.6039
 92 | Recall               = 0.6031
 93 | Average Loss         = 1.4961
 94 | 
 95 | 
 96 | Saving Results ...
 97 | Results Saved
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------