├── .gitignore ├── media ├── palm.png ├── results.png ├── palm_emnlp.png ├── print_results.png └── palm_vs_palm_dagger.png ├── logs ├── results.sh ├── zeroshot │ ├── CREMA-D.json │ ├── ESC50.json │ ├── RAVDESS.json │ ├── SESA.json │ ├── TUT2017.json │ ├── ESC50-FOLD1.json │ ├── ESC50-FOLD2.json │ ├── ESC50-FOLD3.json │ ├── ESC50-FOLD4.json │ ├── ESC50-FOLD5.json │ ├── NS-Instruments.json │ ├── TUT2017-FOLD2.json │ ├── TUT2017-FOLD3.json │ ├── UrbanSound8K.json │ ├── VocalSound.json │ ├── Beijing-Opera-FOLD2.json │ ├── Beijing-Opera-FOLD3.json │ ├── Beijing-Opera-FOLD4.json │ ├── Beijing-Opera.json │ ├── ESC50-Actions-FOLD1.json │ ├── ESC50-Actions-FOLD3.json │ ├── ESC50-Actions.json │ ├── GT-Music-Genre.json │ ├── TUT2017-FOLD1.json │ ├── TUT2017-FOLD4.json │ ├── UrbanSound8K-FOLD2.json │ ├── UrbanSound8K-FOLD3.json │ ├── UrbanSound8K-FOLD4.json │ ├── UrbanSound8K-FOLD6.json │ ├── UrbanSound8K-FOLD8.json │ ├── Beijing-Opera-FOLD1.json │ ├── Beijing-Opera-FOLD5.json │ ├── ESC50-Actions-FOLD2.json │ ├── ESC50-Actions-FOLD4.json │ ├── ESC50-Actions-FOLD5.json │ ├── UrbanSound8K-FOLD1.json │ ├── UrbanSound8K-FOLD10.json │ ├── UrbanSound8K-FOLD5.json │ ├── UrbanSound8K-FOLD7.json │ ├── UrbanSound8K-FOLD9.json │ ├── accuracy.json │ ├── f1_score.json │ ├── SESA-SEED0.log │ ├── ESC50-Actions-FOLD1-SEED0.log │ ├── ESC50-Actions-FOLD2-SEED0.log │ ├── ESC50-Actions-FOLD3-SEED0.log │ ├── ESC50-Actions-FOLD5-SEED0.log │ ├── Beijing-Opera-FOLD1-SEED0.log │ ├── Beijing-Opera-FOLD2-SEED0.log │ ├── Beijing-Opera-FOLD3-SEED0.log │ ├── Beijing-Opera-FOLD4-SEED0.log │ ├── Beijing-Opera-FOLD5-SEED0.log │ ├── UrbanSound8K-FOLD1-SEED0.log │ ├── UrbanSound8K-FOLD10-SEED0.log │ ├── UrbanSound8K-FOLD2-SEED0.log │ └── UrbanSound8K-FOLD5-SEED0.log ├── palm │ ├── ESC50-Actions-FOLD3.json │ ├── ESC50.json │ ├── ESC50-Actions.json │ ├── ESC50-FOLD1.json │ ├── SESA.json │ ├── Beijing-Opera-FOLD1.json │ ├── Beijing-Opera-FOLD2.json │ ├── Beijing-Opera-FOLD3.json │ ├── Beijing-Opera-FOLD4.json │ ├── CREMA-D.json │ ├── ESC50-FOLD2.json │ ├── ESC50-FOLD3.json │ ├── ESC50-FOLD4.json │ ├── ESC50-FOLD5.json │ ├── RAVDESS.json │ ├── TUT2017.json │ ├── VocalSound.json │ ├── Beijing-Opera.json │ ├── ESC50-Actions-FOLD1.json │ ├── ESC50-Actions-FOLD4.json │ ├── GT-Music-Genre.json │ ├── NS-Instruments.json │ ├── TUT2017-FOLD1.json │ ├── TUT2017-FOLD2.json │ ├── TUT2017-FOLD3.json │ ├── TUT2017-FOLD4.json │ ├── UrbanSound8K.json │ ├── ESC50-Actions-FOLD2.json │ ├── ESC50-Actions-FOLD5.json │ ├── UrbanSound8K-FOLD1.json │ ├── UrbanSound8K-FOLD10.json │ ├── UrbanSound8K-FOLD2.json │ ├── UrbanSound8K-FOLD3.json │ ├── UrbanSound8K-FOLD4.json │ ├── UrbanSound8K-FOLD5.json │ ├── UrbanSound8K-FOLD6.json │ ├── UrbanSound8K-FOLD7.json │ ├── UrbanSound8K-FOLD8.json │ ├── UrbanSound8K-FOLD9.json │ ├── Beijing-Opera-FOLD5.json │ ├── accuracy.json │ └── f1_score.json ├── cocoop │ ├── Beijing-Opera-FOLD2.json │ ├── ESC50.json │ ├── Beijing-Opera-FOLD1.json │ ├── Beijing-Opera-FOLD4.json │ ├── Beijing-Opera-FOLD5.json │ ├── ESC50-Actions-FOLD3.json │ ├── ESC50-Actions.json │ ├── ESC50-FOLD2.json │ ├── ESC50-FOLD3.json │ ├── ESC50-FOLD4.json │ ├── ESC50-FOLD5.json │ ├── SESA.json │ ├── Beijing-Opera.json │ ├── CREMA-D.json │ ├── ESC50-Actions-FOLD5.json │ ├── ESC50-FOLD1.json │ ├── GT-Music-Genre.json │ ├── RAVDESS.json │ ├── TUT2017-FOLD1.json │ ├── TUT2017-FOLD3.json │ ├── TUT2017-FOLD4.json │ ├── TUT2017.json │ ├── UrbanSound8K.json │ ├── VocalSound.json │ ├── ESC50-Actions-FOLD1.json │ ├── ESC50-Actions-FOLD2.json │ ├── ESC50-Actions-FOLD4.json │ ├── NS-Instruments.json │ ├── TUT2017-FOLD2.json │ ├── UrbanSound8K-FOLD10.json │ ├── UrbanSound8K-FOLD9.json │ ├── Beijing-Opera-FOLD3.json │ ├── UrbanSound8K-FOLD1.json │ ├── UrbanSound8K-FOLD2.json │ ├── UrbanSound8K-FOLD3.json │ ├── UrbanSound8K-FOLD4.json │ ├── UrbanSound8K-FOLD5.json │ ├── UrbanSound8K-FOLD6.json │ ├── UrbanSound8K-FOLD7.json │ ├── UrbanSound8K-FOLD8.json │ ├── accuracy.json │ └── f1_score.json ├── coop │ ├── ESC50-Actions-FOLD3.json │ ├── ESC50-Actions-FOLD4.json │ ├── ESC50-FOLD4.json │ ├── ESC50.json │ ├── SESA.json │ ├── Beijing-Opera-FOLD4.json │ ├── CREMA-D.json │ ├── ESC50-Actions.json │ ├── ESC50-FOLD1.json │ ├── ESC50-FOLD2.json │ ├── ESC50-FOLD3.json │ ├── ESC50-FOLD5.json │ ├── RAVDESS.json │ ├── TUT2017.json │ ├── VocalSound.json │ ├── Beijing-Opera.json │ ├── ESC50-Actions-FOLD2.json │ ├── ESC50-Actions-FOLD5.json │ ├── GT-Music-Genre.json │ ├── TUT2017-FOLD1.json │ ├── TUT2017-FOLD2.json │ ├── TUT2017-FOLD3.json │ ├── TUT2017-FOLD4.json │ ├── UrbanSound8K.json │ ├── Beijing-Opera-FOLD1.json │ ├── Beijing-Opera-FOLD2.json │ ├── Beijing-Opera-FOLD3.json │ ├── ESC50-Actions-FOLD1.json │ ├── NS-Instruments.json │ ├── UrbanSound8K-FOLD1.json │ ├── UrbanSound8K-FOLD10.json │ ├── UrbanSound8K-FOLD2.json │ ├── UrbanSound8K-FOLD3.json │ ├── UrbanSound8K-FOLD4.json │ ├── UrbanSound8K-FOLD5.json │ ├── UrbanSound8K-FOLD6.json │ ├── UrbanSound8K-FOLD7.json │ ├── UrbanSound8K-FOLD8.json │ ├── UrbanSound8K-FOLD9.json │ ├── Beijing-Opera-FOLD5.json │ ├── accuracy.json │ └── f1_score.json ├── process_results.py └── print_results.py ├── pengi ├── models │ ├── __init__.py │ └── audio.py ├── __init__.py └── configs │ ├── base.yml │ └── base_no_text_enc.yml ├── palm ├── __init__.py ├── zeroshot.py └── palm.py ├── scripts ├── run_all_datasets_coop.sh ├── run_all_datasets_palm.sh ├── run_all_datasets_cocoop.sh ├── run_all_datasets_zeroshot.sh ├── run_all_datasets_all_methods.sh ├── sesa.sh ├── crema_d.sh ├── ravdess.sh ├── vocal_sound.sh ├── gt_music_genre.sh ├── ns_instruments.sh ├── esc50.sh ├── tut.sh ├── beijing_opera.sh ├── esc50_actions.sh └── urban_sound.sh ├── requirements.txt ├── LICENSE ├── main.py └── utils ├── trainer.py └── dataset.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__/ 2 | pengi/configs/base.pth -------------------------------------------------------------------------------- /media/palm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asif-hanif/palm/HEAD/media/palm.png -------------------------------------------------------------------------------- /media/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asif-hanif/palm/HEAD/media/results.png -------------------------------------------------------------------------------- /media/palm_emnlp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asif-hanif/palm/HEAD/media/palm_emnlp.png -------------------------------------------------------------------------------- /media/print_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asif-hanif/palm/HEAD/media/print_results.png -------------------------------------------------------------------------------- /logs/results.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python process_folds.py 3 | python process_results.py 4 | python print_results.py -------------------------------------------------------------------------------- /media/palm_vs_palm_dagger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asif-hanif/palm/HEAD/media/palm_vs_palm_dagger.png -------------------------------------------------------------------------------- /pengi/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | # import models.audio 4 | # import models.config 5 | # import models.decoder 6 | # import models.pengi -------------------------------------------------------------------------------- /palm/__init__.py: -------------------------------------------------------------------------------- 1 | from .zeroshot import ZeroShotPENGI as ZeroShot 2 | from .coop import CustomPENGI as COOP 3 | from .cocoop import CustomPENGI as COCOOP 4 | from .palm import CustomPENGI as PALM 5 | 6 | -------------------------------------------------------------------------------- /logs/zeroshot/CREMA-D.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.231, 4 | "f1_score": 0.172, 5 | "precision": 0.2026, 6 | "recall": 0.2609, 7 | "avg_loss": 2.883, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.4965, 4 | "f1_score": 0.4478, 5 | "precision": 0.5376, 6 | "recall": 0.4965, 7 | "avg_loss": 1.8702, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/RAVDESS.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.1222, 4 | "f1_score": 0.0771, 5 | "precision": 0.2227, 6 | "recall": 0.1432, 7 | "avg_loss": 3.9791, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/SESA.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7238, 4 | "f1_score": 0.6827, 5 | "precision": 0.6941, 6 | "recall": 0.7508, 7 | "avg_loss": 1.3722, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/TUT2017.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.2435, 4 | "f1_score": 0.1795, 5 | "precision": 0.2958, 6 | "recall": 0.2434, 7 | "avg_loss": 3.5088, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.4975, 4 | "f1_score": 0.4464, 5 | "precision": 0.5487, 6 | "recall": 0.4975, 7 | "avg_loss": 1.7492, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.4825, 4 | "f1_score": 0.4288, 5 | "precision": 0.5242, 6 | "recall": 0.4825, 7 | "avg_loss": 2.0177, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.495, 4 | "f1_score": 0.4421, 5 | "precision": 0.5346, 6 | "recall": 0.495, 7 | "avg_loss": 1.8511, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.4925, 4 | "f1_score": 0.4422, 5 | "precision": 0.5054, 6 | "recall": 0.4925, 7 | "avg_loss": 1.7776, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.515, 4 | "f1_score": 0.4794, 5 | "precision": 0.575, 6 | "recall": 0.515, 7 | "avg_loss": 1.9556, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/NS-Instruments.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.3291, 4 | "f1_score": 0.26, 5 | "precision": 0.308, 6 | "recall": 0.2962, 7 | "avg_loss": 2.8539, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/TUT2017-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.2401, 4 | "f1_score": 0.174, 5 | "precision": 0.3072, 6 | "recall": 0.24, 7 | "avg_loss": 3.5171, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/TUT2017-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.2429, 4 | "f1_score": 0.1797, 5 | "precision": 0.326, 6 | "recall": 0.2428, 7 | "avg_loss": 3.431, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.5349, 4 | "f1_score": 0.5144, 5 | "precision": 0.5742, 6 | "recall": 0.5464, 7 | "avg_loss": 1.7085, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/VocalSound.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.4197, 4 | "f1_score": 0.3834, 5 | "precision": 0.4974, 6 | "recall": 0.4195, 7 | "avg_loss": 1.7859, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/Beijing-Opera-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.2766, 4 | "f1_score": 0.1083, 5 | "precision": 0.0691, 6 | "recall": 0.25, 7 | "avg_loss": 3.7753, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/Beijing-Opera-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.2766, 4 | "f1_score": 0.1083, 5 | "precision": 0.0691, 6 | "recall": 0.25, 7 | "avg_loss": 3.8906, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/Beijing-Opera-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.2766, 4 | "f1_score": 0.1083, 5 | "precision": 0.0691, 6 | "recall": 0.25, 7 | "avg_loss": 3.9215, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/Beijing-Opera.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.2881, 4 | "f1_score": 0.1321, 5 | "precision": 0.1697, 6 | "recall": 0.2633, 7 | "avg_loss": 3.8613, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-Actions-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7, 4 | "f1_score": 0.6518, 5 | "precision": 0.7361, 6 | "recall": 0.7, 7 | "avg_loss": 1.0603, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-Actions-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.675, 4 | "f1_score": 0.6251, 5 | "precision": 0.6599, 6 | "recall": 0.675, 7 | "avg_loss": 0.8852, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-Actions.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6525, 4 | "f1_score": 0.6138, 5 | "precision": 0.6874, 6 | "recall": 0.6525, 7 | "avg_loss": 1.0578, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/GT-Music-Genre.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.325, 4 | "f1_score": 0.2807, 5 | "precision": 0.3092, 6 | "recall": 0.3406, 7 | "avg_loss": 4.1713, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/TUT2017-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.2453, 4 | "f1_score": 0.1821, 5 | "precision": 0.2455, 6 | "recall": 0.2453, 7 | "avg_loss": 3.5917, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/TUT2017-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.2456, 4 | "f1_score": 0.1821, 5 | "precision": 0.3044, 6 | "recall": 0.2456, 7 | "avg_loss": 3.4954, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.545, 4 | "f1_score": 0.501, 5 | "precision": 0.5382, 6 | "recall": 0.5059, 7 | "avg_loss": 1.6292, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.467, 4 | "f1_score": 0.4633, 5 | "precision": 0.5381, 6 | "recall": 0.4956, 7 | "avg_loss": 2.0666, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.5677, 4 | "f1_score": 0.5618, 5 | "precision": 0.6388, 6 | "recall": 0.5754, 7 | "avg_loss": 1.345, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD6.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.5128, 4 | "f1_score": 0.502, 5 | "precision": 0.5619, 6 | "recall": 0.5428, 7 | "avg_loss": 1.9537, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD8.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.5261, 4 | "f1_score": 0.5193, 5 | "precision": 0.576, 6 | "recall": 0.5532, 7 | "avg_loss": 1.7248, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/Beijing-Opera-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.2917, 4 | "f1_score": 0.1468, 5 | "precision": 0.3191, 6 | "recall": 0.2708, 7 | "avg_loss": 4.0296, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/Beijing-Opera-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.3191, 4 | "f1_score": 0.189, 5 | "precision": 0.3222, 6 | "recall": 0.2955, 7 | "avg_loss": 3.6894, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-Actions-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6375, 4 | "f1_score": 0.5951, 5 | "precision": 0.7354, 6 | "recall": 0.6375, 7 | "avg_loss": 1.1233, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-Actions-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6125, 4 | "f1_score": 0.578, 5 | "precision": 0.6303, 6 | "recall": 0.6125, 7 | "avg_loss": 1.1387, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-Actions-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6375, 4 | "f1_score": 0.6191, 5 | "precision": 0.6751, 6 | "recall": 0.6375, 7 | "avg_loss": 1.0817, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.5487, 4 | "f1_score": 0.5236, 5 | "precision": 0.5844, 6 | "recall": 0.5538, 7 | "avg_loss": 1.6837, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD10.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.5078, 4 | "f1_score": 0.4733, 5 | "precision": 0.5256, 6 | "recall": 0.512, 7 | "avg_loss": 1.7276, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6047, 4 | "f1_score": 0.5699, 5 | "precision": 0.6039, 6 | "recall": 0.6031, 7 | "avg_loss": 1.4961, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD7.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.5274, 4 | "f1_score": 0.5195, 5 | "precision": 0.6315, 6 | "recall": 0.5583, 7 | "avg_loss": 1.7977, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD9.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.5417, 4 | "f1_score": 0.5105, 5 | "precision": 0.5439, 6 | "recall": 0.5643, 7 | "avg_loss": 1.6609, 8 | "epoch": -1 9 | } 10 | } -------------------------------------------------------------------------------- /pengi/models/audio.py: -------------------------------------------------------------------------------- 1 | from .htsat import HTSATWrapper 2 | 3 | def get_audio_encoder(name: str): 4 | if name == "HTSAT": 5 | return HTSATWrapper, 768 6 | else: 7 | raise Exception('The audio encoder name {} is incorrect or not supported'.format(name)) -------------------------------------------------------------------------------- /pengi/__init__.py: -------------------------------------------------------------------------------- 1 | from .models.audio import get_audio_encoder 2 | from .models.pengi import Projection 3 | 4 | from .wrapper import PengiWrapper as Pengi 5 | 6 | pengi = Pengi(config="base") 7 | pengi.args.classes_num = None 8 | pengi.args.use_precomputed_melspec = False 9 | pengi.args.pretrained_audioencoder_path = None 10 | 11 | 12 | process_audio_fn = pengi.preprocess_audio 13 | -------------------------------------------------------------------------------- /scripts/run_all_datasets_coop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | bash scripts/beijing_opera.sh coop 3 | bash scripts/crema_d.sh coop 4 | bash scripts/esc50_actions.sh coop 5 | bash scripts/esc50.sh coop 6 | bash scripts/gt_music_genre.sh coop 7 | bash scripts/ns_instruments.sh coop 8 | bash scripts/ravdess.sh coop 9 | bash scripts/sesa.sh coop 10 | bash scripts/tut.sh coop 11 | bash scripts/urban_sound.sh coop 12 | bash scripts/vocal_sound.sh coop -------------------------------------------------------------------------------- /scripts/run_all_datasets_palm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | bash scripts/beijing_opera.sh palm 3 | bash scripts/crema_d.sh palm 4 | bash scripts/esc50_actions.sh palm 5 | bash scripts/esc50.sh palm 6 | bash scripts/gt_music_genre.sh palm 7 | bash scripts/ns_instruments.sh palm 8 | bash scripts/ravdess.sh palm 9 | bash scripts/sesa.sh palm 10 | bash scripts/tut.sh palm 11 | bash scripts/urban_sound.sh palm 12 | bash scripts/vocal_sound.sh palm -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # PyTorch 2 | --extra-index-url https://download.pytorch.org/whl/cu113 3 | torch==1.11.0+cu113 4 | torchvision==0.12.0+cu113 5 | torchaudio==0.11.0 6 | 7 | # Others 8 | numpy==1.23.0 9 | pandas==2.0.3 10 | matplotlib==3.6.3 11 | scikit-learn==1.2.0 12 | notebook==6.5.6 13 | tabulate==0.9.0 14 | 15 | torchlibrosa==0.1.0 16 | transformers==4.28.1 17 | PyYAML==6.0 18 | importlib_resources==5.12.0 19 | librosa==0.10.0.post2 20 | -------------------------------------------------------------------------------- /scripts/run_all_datasets_cocoop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | bash scripts/beijing_opera.sh cocoop 3 | bash scripts/crema_d.sh cocoop 4 | bash scripts/esc50_actions.sh cocoop 5 | bash scripts/esc50.sh cocoop 6 | bash scripts/gt_music_genre.sh cocoop 7 | bash scripts/ns_instruments.sh cocoop 8 | bash scripts/ravdess.sh cocoop 9 | bash scripts/sesa.sh cocoop 10 | bash scripts/tut.sh cocoop 11 | bash scripts/urban_sound.sh cocoop 12 | bash scripts/vocal_sound.sh cocoop -------------------------------------------------------------------------------- /scripts/run_all_datasets_zeroshot.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | bash scripts/beijing_opera.sh zeroshot 3 | bash scripts/crema_d.sh zeroshot 4 | bash scripts/esc50_actions.sh zeroshot 5 | bash scripts/esc50.sh zeroshot 6 | bash scripts/gt_music_genre.sh zeroshot 7 | bash scripts/ns_instruments.sh zeroshot 8 | bash scripts/ravdess.sh zeroshot 9 | bash scripts/sesa.sh zeroshot 10 | bash scripts/tut.sh zeroshot 11 | bash scripts/urban_sound.sh zeroshot 12 | bash scripts/vocal_sound.sh zeroshot -------------------------------------------------------------------------------- /logs/zeroshot/accuracy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Beijing-Opera": [ 3 | 0.2881 4 | ], 5 | "CREMA-D": [ 6 | 0.231 7 | ], 8 | "ESC50-Actions": [ 9 | 0.6525 10 | ], 11 | "ESC50": [ 12 | 0.4965 13 | ], 14 | "GT-Music-Genre": [ 15 | 0.325 16 | ], 17 | "NS-Instruments": [ 18 | 0.3291 19 | ], 20 | "RAVDESS": [ 21 | 0.1222 22 | ], 23 | "SESA": [ 24 | 0.7238 25 | ], 26 | "TUT2017": [ 27 | 0.2435 28 | ], 29 | "UrbanSound8K": [ 30 | 0.5349 31 | ], 32 | "VocalSound": [ 33 | 0.4197 34 | ] 35 | } -------------------------------------------------------------------------------- /logs/zeroshot/f1_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "Beijing-Opera": [ 3 | 0.1321 4 | ], 5 | "CREMA-D": [ 6 | 0.172 7 | ], 8 | "ESC50-Actions": [ 9 | 0.6138 10 | ], 11 | "ESC50": [ 12 | 0.4478 13 | ], 14 | "GT-Music-Genre": [ 15 | 0.2807 16 | ], 17 | "NS-Instruments": [ 18 | 0.26 19 | ], 20 | "RAVDESS": [ 21 | 0.0771 22 | ], 23 | "SESA": [ 24 | 0.6827 25 | ], 26 | "TUT2017": [ 27 | 0.1795 28 | ], 29 | "UrbanSound8K": [ 30 | 0.5144 31 | ], 32 | "VocalSound": [ 33 | 0.3834 34 | ] 35 | } -------------------------------------------------------------------------------- /logs/palm/ESC50-Actions-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 1.0, 4 | "f1_score": 1.0, 5 | "precision": 1.0, 6 | "recall": 1.0, 7 | "avg_loss": 0.0033, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9875, 12 | "f1_score": 0.9875, 13 | "precision": 0.9889, 14 | "recall": 0.9875, 15 | "avg_loss": 0.0224, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 1.0, 20 | "f1_score": 1.0, 21 | "precision": 1.0, 22 | "recall": 1.0, 23 | "avg_loss": 0.0, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/Beijing-Opera-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 1.0, 4 | "f1_score": 1.0, 5 | "precision": 1.0, 6 | "recall": 1.0, 7 | "avg_loss": 0.0379, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9787, 12 | "f1_score": 0.9799, 13 | "precision": 0.9821, 14 | "recall": 0.9792, 15 | "avg_loss": 0.0701, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 1.0, 20 | "f1_score": 1.0, 21 | "precision": 1.0, 22 | "recall": 1.0, 23 | "avg_loss": 0.0225, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/ESC50.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.956, 4 | "f1_score": 0.9544, 5 | "precision": 0.9633, 6 | "recall": 0.956, 7 | "avg_loss": 0.299, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.96, 12 | "f1_score": 0.9591, 13 | "precision": 0.9649, 14 | "recall": 0.96, 15 | "avg_loss": 0.2285, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.962, 20 | "f1_score": 0.9614, 21 | "precision": 0.9659, 22 | "recall": 0.962, 23 | "avg_loss": 0.2537, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.946, 4 | "f1_score": 0.9448, 5 | "precision": 0.9508, 6 | "recall": 0.946, 7 | "avg_loss": 0.1992, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.937, 12 | "f1_score": 0.9351, 13 | "precision": 0.9451, 14 | "recall": 0.937, 15 | "avg_loss": 0.2249, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.945, 20 | "f1_score": 0.9431, 21 | "precision": 0.9534, 22 | "recall": 0.945, 23 | "avg_loss": 0.2002, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50-Actions-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.975, 4 | "f1_score": 0.9739, 5 | "precision": 0.9778, 6 | "recall": 0.975, 7 | "avg_loss": 0.0883, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.95, 12 | "f1_score": 0.9496, 13 | "precision": 0.955, 14 | "recall": 0.95, 15 | "avg_loss": 0.1048, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 1.0, 20 | "f1_score": 1.0, 21 | "precision": 1.0, 22 | "recall": 1.0, 23 | "avg_loss": 0.0615, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50-Actions-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.975, 4 | "f1_score": 0.9739, 5 | "precision": 0.9778, 6 | "recall": 0.975, 7 | "avg_loss": 0.1317, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.975, 12 | "f1_score": 0.9749, 13 | "precision": 0.9778, 14 | "recall": 0.975, 15 | "avg_loss": 0.0766, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 1.0, 20 | "f1_score": 1.0, 21 | "precision": 1.0, 22 | "recall": 1.0, 23 | "avg_loss": 0.0551, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9525, 4 | "f1_score": 0.9516, 5 | "precision": 0.9557, 6 | "recall": 0.9525, 7 | "avg_loss": 0.1851, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.95, 12 | "f1_score": 0.9488, 13 | "precision": 0.9553, 14 | "recall": 0.95, 15 | "avg_loss": 0.1796, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.945, 20 | "f1_score": 0.945, 21 | "precision": 0.9512, 22 | "recall": 0.945, 23 | "avg_loss": 0.223, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.941, 4 | "f1_score": 0.9399, 5 | "precision": 0.9497, 6 | "recall": 0.941, 7 | "avg_loss": 0.2059, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.939, 12 | "f1_score": 0.9369, 13 | "precision": 0.9471, 14 | "recall": 0.939, 15 | "avg_loss": 0.2297, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9345, 20 | "f1_score": 0.9335, 21 | "precision": 0.9437, 22 | "recall": 0.9345, 23 | "avg_loss": 0.2171, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/SESA.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9143, 4 | "f1_score": 0.9044, 5 | "precision": 0.8993, 6 | "recall": 0.9143, 7 | "avg_loss": 0.2474, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8952, 12 | "f1_score": 0.8959, 13 | "precision": 0.9002, 14 | "recall": 0.9008, 15 | "avg_loss": 0.3017, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8762, 20 | "f1_score": 0.8639, 21 | "precision": 0.865, 22 | "recall": 0.8696, 23 | "avg_loss": 0.342, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/ESC50-Actions.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.97, 4 | "f1_score": 0.9698, 5 | "precision": 0.9757, 6 | "recall": 0.97, 7 | "avg_loss": 0.2604, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9625, 12 | "f1_score": 0.962, 13 | "precision": 0.9677, 14 | "recall": 0.9625, 15 | "avg_loss": 0.2, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.965, 20 | "f1_score": 0.9644, 21 | "precision": 0.9689, 22 | "recall": 0.965, 23 | "avg_loss": 0.2197, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/ESC50-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.965, 4 | "f1_score": 0.9633, 5 | "precision": 0.9685, 6 | "recall": 0.965, 7 | "avg_loss": 0.2119, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.965, 12 | "f1_score": 0.9637, 13 | "precision": 0.9675, 14 | "recall": 0.965, 15 | "avg_loss": 0.2543, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.96, 20 | "f1_score": 0.9588, 21 | "precision": 0.9634, 22 | "recall": 0.96, 23 | "avg_loss": 0.2516, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/SESA.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8857, 4 | "f1_score": 0.8955, 5 | "precision": 0.9062, 6 | "recall": 0.8907, 7 | "avg_loss": 0.5661, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9143, 12 | "f1_score": 0.9197, 13 | "precision": 0.9273, 14 | "recall": 0.9213, 15 | "avg_loss": 0.4241, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8857, 20 | "f1_score": 0.8805, 21 | "precision": 0.8909, 22 | "recall": 0.894, 23 | "avg_loss": 2.0808, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/Beijing-Opera-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9375, 4 | "f1_score": 0.9348, 5 | "precision": 0.9423, 6 | "recall": 0.9423, 7 | "avg_loss": 0.1414, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 1.0, 12 | "f1_score": 1.0, 13 | "precision": 1.0, 14 | "recall": 1.0, 15 | "avg_loss": 0.0241, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9792, 20 | "f1_score": 0.9776, 21 | "precision": 0.9821, 22 | "recall": 0.975, 23 | "avg_loss": 0.0527, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/Beijing-Opera-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9787, 4 | "f1_score": 0.9768, 5 | "precision": 0.9808, 6 | "recall": 0.975, 7 | "avg_loss": 0.0794, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9787, 12 | "f1_score": 0.9772, 13 | "precision": 0.9773, 14 | "recall": 0.9792, 15 | "avg_loss": 0.0862, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 1.0, 20 | "f1_score": 1.0, 21 | "precision": 1.0, 22 | "recall": 1.0, 23 | "avg_loss": 0.0596, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/Beijing-Opera-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9149, 4 | "f1_score": 0.9099, 5 | "precision": 0.925, 6 | "recall": 0.9126, 7 | "avg_loss": 0.2799, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9787, 12 | "f1_score": 0.9776, 13 | "precision": 0.9821, 14 | "recall": 0.975, 15 | "avg_loss": 0.0502, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 1.0, 20 | "f1_score": 1.0, 21 | "precision": 1.0, 22 | "recall": 1.0, 23 | "avg_loss": 0.0481, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50-Actions-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9875, 4 | "f1_score": 0.9875, 5 | "precision": 0.9889, 6 | "recall": 0.9875, 7 | "avg_loss": 0.0394, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.975, 12 | "f1_score": 0.9749, 13 | "precision": 0.9778, 14 | "recall": 0.975, 15 | "avg_loss": 0.0585, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 1.0, 20 | "f1_score": 1.0, 21 | "precision": 1.0, 22 | "recall": 1.0, 23 | "avg_loss": 0.0358, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50-Actions.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.97, 4 | "f1_score": 0.9703, 5 | "precision": 0.9758, 6 | "recall": 0.97, 7 | "avg_loss": 0.0762, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9625, 12 | "f1_score": 0.9621, 13 | "precision": 0.9681, 14 | "recall": 0.9625, 15 | "avg_loss": 0.085, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.965, 20 | "f1_score": 0.9648, 21 | "precision": 0.971, 22 | "recall": 0.965, 23 | "avg_loss": 0.1053, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9575, 4 | "f1_score": 0.9572, 5 | "precision": 0.9623, 6 | "recall": 0.9575, 7 | "avg_loss": 0.1299, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.93, 12 | "f1_score": 0.9273, 13 | "precision": 0.9398, 14 | "recall": 0.93, 15 | "avg_loss": 0.3048, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.965, 20 | "f1_score": 0.965, 21 | "precision": 0.9685, 22 | "recall": 0.965, 23 | "avg_loss": 0.1443, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.92, 4 | "f1_score": 0.9177, 5 | "precision": 0.9295, 6 | "recall": 0.92, 7 | "avg_loss": 0.2466, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.915, 12 | "f1_score": 0.9111, 13 | "precision": 0.9263, 14 | "recall": 0.915, 15 | "avg_loss": 0.2212, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9325, 20 | "f1_score": 0.9271, 21 | "precision": 0.9431, 22 | "recall": 0.9325, 23 | "avg_loss": 0.2297, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.965, 4 | "f1_score": 0.9644, 5 | "precision": 0.9662, 6 | "recall": 0.965, 7 | "avg_loss": 0.129, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9575, 12 | "f1_score": 0.9568, 13 | "precision": 0.963, 14 | "recall": 0.9575, 15 | "avg_loss": 0.1317, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.945, 20 | "f1_score": 0.9452, 21 | "precision": 0.9569, 22 | "recall": 0.945, 23 | "avg_loss": 0.1732, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.935, 4 | "f1_score": 0.9335, 5 | "precision": 0.9398, 6 | "recall": 0.935, 7 | "avg_loss": 0.3011, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9375, 12 | "f1_score": 0.9368, 13 | "precision": 0.9464, 14 | "recall": 0.9375, 15 | "avg_loss": 0.2632, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.93, 20 | "f1_score": 0.9269, 21 | "precision": 0.94, 22 | "recall": 0.93, 23 | "avg_loss": 0.2595, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/SESA.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8381, 4 | "f1_score": 0.8425, 5 | "precision": 0.8542, 6 | "recall": 0.848, 7 | "avg_loss": 0.3378, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8762, 12 | "f1_score": 0.8821, 13 | "precision": 0.8908, 14 | "recall": 0.8788, 15 | "avg_loss": 0.2775, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8952, 20 | "f1_score": 0.8847, 21 | "precision": 0.8851, 22 | "recall": 0.8949, 23 | "avg_loss": 0.3547, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/Beijing-Opera-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 1.0, 4 | "f1_score": 1.0, 5 | "precision": 1.0, 6 | "recall": 1.0, 7 | "avg_loss": 0.0937, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9787, 12 | "f1_score": 0.9772, 13 | "precision": 0.9773, 14 | "recall": 0.9792, 15 | "avg_loss": 0.1427, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9574, 20 | "f1_score": 0.9545, 21 | "precision": 0.9583, 22 | "recall": 0.9583, 23 | "avg_loss": 0.1051, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/CREMA-D.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.313, 4 | "f1_score": 0.2609, 5 | "precision": 0.2909, 6 | "recall": 0.3439, 7 | "avg_loss": 1.7938, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.4197, 12 | "f1_score": 0.2745, 13 | "precision": 0.2817, 14 | "recall": 0.3173, 15 | "avg_loss": 1.5727, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.276, 20 | "f1_score": 0.2389, 21 | "precision": 0.2947, 22 | "recall": 0.3498, 23 | "avg_loss": 1.5965, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50-Actions.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9625, 4 | "f1_score": 0.9618, 5 | "precision": 0.9667, 6 | "recall": 0.9625, 7 | "avg_loss": 0.1252, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.94, 12 | "f1_score": 0.939, 13 | "precision": 0.9465, 14 | "recall": 0.94, 15 | "avg_loss": 0.1606, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.955, 20 | "f1_score": 0.9532, 21 | "precision": 0.9635, 22 | "recall": 0.955, 23 | "avg_loss": 0.1533, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9225, 4 | "f1_score": 0.9209, 5 | "precision": 0.94, 6 | "recall": 0.9225, 7 | "avg_loss": 0.2018, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9425, 12 | "f1_score": 0.9405, 13 | "precision": 0.9487, 14 | "recall": 0.9425, 15 | "avg_loss": 0.1728, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.92, 20 | "f1_score": 0.9174, 21 | "precision": 0.9336, 22 | "recall": 0.92, 23 | "avg_loss": 0.1842, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9525, 4 | "f1_score": 0.9521, 5 | "precision": 0.9594, 6 | "recall": 0.9525, 7 | "avg_loss": 0.1646, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.95, 12 | "f1_score": 0.9487, 13 | "precision": 0.9573, 14 | "recall": 0.95, 15 | "avg_loss": 0.222, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9475, 20 | "f1_score": 0.946, 21 | "precision": 0.956, 22 | "recall": 0.9475, 23 | "avg_loss": 0.1604, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.945, 4 | "f1_score": 0.9432, 5 | "precision": 0.9529, 6 | "recall": 0.945, 7 | "avg_loss": 0.1947, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9225, 12 | "f1_score": 0.9172, 13 | "precision": 0.9352, 14 | "recall": 0.9225, 15 | "avg_loss": 0.2257, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9325, 20 | "f1_score": 0.9313, 21 | "precision": 0.9392, 22 | "recall": 0.9325, 23 | "avg_loss": 0.205, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9325, 4 | "f1_score": 0.9316, 5 | "precision": 0.9403, 6 | "recall": 0.9325, 7 | "avg_loss": 0.2835, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.93, 12 | "f1_score": 0.9293, 13 | "precision": 0.9389, 14 | "recall": 0.93, 15 | "avg_loss": 0.3484, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9275, 20 | "f1_score": 0.9278, 21 | "precision": 0.9386, 22 | "recall": 0.9275, 23 | "avg_loss": 0.3128, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/RAVDESS.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.3849, 4 | "f1_score": 0.3668, 5 | "precision": 0.4042, 6 | "recall": 0.4284, 7 | "avg_loss": 1.6312, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.2688, 12 | "f1_score": 0.2385, 13 | "precision": 0.2657, 14 | "recall": 0.2744, 15 | "avg_loss": 1.9451, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.3422, 20 | "f1_score": 0.3075, 21 | "precision": 0.3023, 22 | "recall": 0.3566, 23 | "avg_loss": 1.6283, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/TUT2017.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6391, 4 | "f1_score": 0.6315, 5 | "precision": 0.6524, 6 | "recall": 0.6391, 7 | "avg_loss": 1.0158, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.6667, 12 | "f1_score": 0.6596, 13 | "precision": 0.6774, 14 | "recall": 0.6667, 15 | "avg_loss": 0.9857, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.6525, 20 | "f1_score": 0.6409, 21 | "precision": 0.6573, 22 | "recall": 0.6525, 23 | "avg_loss": 1.0111, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/VocalSound.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7162, 4 | "f1_score": 0.6953, 5 | "precision": 0.7226, 6 | "recall": 0.7163, 7 | "avg_loss": 0.8699, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7485, 12 | "f1_score": 0.7481, 13 | "precision": 0.7556, 14 | "recall": 0.7486, 15 | "avg_loss": 0.788, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.6642, 20 | "f1_score": 0.6606, 21 | "precision": 0.681, 22 | "recall": 0.6642, 23 | "avg_loss": 0.978, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/Beijing-Opera-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9375, 4 | "f1_score": 0.9348, 5 | "precision": 0.9423, 6 | "recall": 0.9423, 7 | "avg_loss": 1.5655, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9583, 12 | "f1_score": 0.9594, 13 | "precision": 0.9667, 14 | "recall": 0.9583, 15 | "avg_loss": 0.1225, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 1.0, 20 | "f1_score": 1.0, 21 | "precision": 1.0, 22 | "recall": 1.0, 23 | "avg_loss": 0.0007, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/Beijing-Opera-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 1.0, 4 | "f1_score": 1.0, 5 | "precision": 1.0, 6 | "recall": 1.0, 7 | "avg_loss": 0.0116, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9149, 12 | "f1_score": 0.9132, 13 | "precision": 0.9201, 14 | "recall": 0.9173, 15 | "avg_loss": 0.6439, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9574, 20 | "f1_score": 0.9599, 21 | "precision": 0.9643, 22 | "recall": 0.9615, 23 | "avg_loss": 0.1688, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/Beijing-Opera-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9787, 4 | "f1_score": 0.9772, 5 | "precision": 0.9773, 6 | "recall": 0.9792, 7 | "avg_loss": 0.1265, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9362, 12 | "f1_score": 0.9331, 13 | "precision": 0.9344, 14 | "recall": 0.9333, 15 | "avg_loss": 0.1467, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 1.0, 20 | "f1_score": 1.0, 21 | "precision": 1.0, 22 | "recall": 1.0, 23 | "avg_loss": 0.0, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/Beijing-Opera-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 1.0, 4 | "f1_score": 1.0, 5 | "precision": 1.0, 6 | "recall": 1.0, 7 | "avg_loss": 0.0015, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8936, 12 | "f1_score": 0.8882, 13 | "precision": 0.9107, 14 | "recall": 0.8958, 15 | "avg_loss": 0.411, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9787, 20 | "f1_score": 0.9772, 21 | "precision": 0.9773, 22 | "recall": 0.9792, 23 | "avg_loss": 0.2133, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/CREMA-D.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.4453, 4 | "f1_score": 0.3277, 5 | "precision": 0.334, 6 | "recall": 0.3604, 7 | "avg_loss": 2.9509, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.358, 12 | "f1_score": 0.3083, 13 | "precision": 0.3326, 14 | "recall": 0.3725, 15 | "avg_loss": 3.1415, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.2344, 20 | "f1_score": 0.199, 21 | "precision": 0.2792, 22 | "recall": 0.2948, 23 | "avg_loss": 2.7708, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/ESC50-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.965, 4 | "f1_score": 0.9648, 5 | "precision": 0.9692, 6 | "recall": 0.965, 7 | "avg_loss": 0.2024, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.97, 12 | "f1_score": 0.9697, 13 | "precision": 0.9729, 14 | "recall": 0.97, 15 | "avg_loss": 0.1548, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9725, 20 | "f1_score": 0.9722, 21 | "precision": 0.9757, 22 | "recall": 0.9725, 23 | "avg_loss": 0.1136, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/ESC50-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.95, 4 | "f1_score": 0.9497, 5 | "precision": 0.9625, 6 | "recall": 0.95, 7 | "avg_loss": 0.3991, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.955, 12 | "f1_score": 0.9544, 13 | "precision": 0.9594, 14 | "recall": 0.955, 15 | "avg_loss": 0.2121, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9475, 20 | "f1_score": 0.9481, 21 | "precision": 0.9528, 22 | "recall": 0.9475, 23 | "avg_loss": 0.3287, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/ESC50-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9675, 4 | "f1_score": 0.967, 5 | "precision": 0.9729, 6 | "recall": 0.9675, 7 | "avg_loss": 0.1743, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.975, 12 | "f1_score": 0.9742, 13 | "precision": 0.9782, 14 | "recall": 0.975, 15 | "avg_loss": 0.1198, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9675, 20 | "f1_score": 0.9667, 21 | "precision": 0.9701, 22 | "recall": 0.9675, 23 | "avg_loss": 0.3094, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/ESC50-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9325, 4 | "f1_score": 0.9274, 5 | "precision": 0.9435, 6 | "recall": 0.9325, 7 | "avg_loss": 0.5073, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.935, 12 | "f1_score": 0.9333, 13 | "precision": 0.9463, 14 | "recall": 0.935, 15 | "avg_loss": 0.4013, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9625, 20 | "f1_score": 0.9614, 21 | "precision": 0.9673, 22 | "recall": 0.9625, 23 | "avg_loss": 0.265, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/RAVDESS.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.4562, 4 | "f1_score": 0.4486, 5 | "precision": 0.4651, 6 | "recall": 0.4848, 7 | "avg_loss": 2.4994, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.4603, 12 | "f1_score": 0.4718, 13 | "precision": 0.4628, 14 | "recall": 0.4905, 15 | "avg_loss": 2.6958, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.4623, 20 | "f1_score": 0.4435, 21 | "precision": 0.4707, 22 | "recall": 0.4439, 23 | "avg_loss": 2.505, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/TUT2017.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7959, 4 | "f1_score": 0.7942, 5 | "precision": 0.8073, 6 | "recall": 0.7959, 7 | "avg_loss": 0.9918, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8047, 12 | "f1_score": 0.8024, 13 | "precision": 0.809, 14 | "recall": 0.8047, 15 | "avg_loss": 0.9609, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7729, 20 | "f1_score": 0.772, 21 | "precision": 0.7846, 22 | "recall": 0.7729, 23 | "avg_loss": 1.0951, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/VocalSound.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8101, 4 | "f1_score": 0.8107, 5 | "precision": 0.8209, 6 | "recall": 0.81, 7 | "avg_loss": 1.5045, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8168, 12 | "f1_score": 0.8179, 13 | "precision": 0.8215, 14 | "recall": 0.8168, 15 | "avg_loss": 1.3647, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7964, 20 | "f1_score": 0.7972, 21 | "precision": 0.8021, 22 | "recall": 0.7964, 23 | "avg_loss": 1.4756, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/Beijing-Opera.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9577, 4 | "f1_score": 0.9556, 5 | "precision": 0.9613, 6 | "recall": 0.958, 7 | "avg_loss": 0.1251, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.983, 12 | "f1_score": 0.9824, 13 | "precision": 0.9838, 14 | "recall": 0.9825, 15 | "avg_loss": 0.054, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9916, 20 | "f1_score": 0.991, 21 | "precision": 0.9919, 22 | "recall": 0.9908, 23 | "avg_loss": 0.0446, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/CREMA-D.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.2539, 4 | "f1_score": 0.2267, 5 | "precision": 0.2911, 6 | "recall": 0.3371, 7 | "avg_loss": 1.7138, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.3358, 12 | "f1_score": 0.2681, 13 | "precision": 0.3493, 14 | "recall": 0.3263, 15 | "avg_loss": 1.6171, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.3156, 20 | "f1_score": 0.2851, 21 | "precision": 0.3206, 22 | "recall": 0.3717, 23 | "avg_loss": 1.5863, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50-Actions-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9625, 4 | "f1_score": 0.9633, 5 | "precision": 0.9727, 6 | "recall": 0.9625, 7 | "avg_loss": 0.1345, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.925, 12 | "f1_score": 0.9235, 13 | "precision": 0.938, 14 | "recall": 0.925, 15 | "avg_loss": 0.1394, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.95, 20 | "f1_score": 0.9495, 21 | "precision": 0.9578, 22 | "recall": 0.95, 23 | "avg_loss": 0.138, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9525, 4 | "f1_score": 0.9513, 5 | "precision": 0.9563, 6 | "recall": 0.9525, 7 | "avg_loss": 0.1892, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.945, 12 | "f1_score": 0.9437, 13 | "precision": 0.95, 14 | "recall": 0.945, 15 | "avg_loss": 0.2038, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9525, 20 | "f1_score": 0.9511, 21 | "precision": 0.9586, 22 | "recall": 0.9525, 23 | "avg_loss": 0.1943, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/GT-Music-Genre.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.75, 4 | "f1_score": 0.7383, 5 | "precision": 0.7553, 6 | "recall": 0.7659, 7 | "avg_loss": 0.7872, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.745, 12 | "f1_score": 0.7417, 13 | "precision": 0.7539, 14 | "recall": 0.7565, 15 | "avg_loss": 0.8638, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.76, 20 | "f1_score": 0.7623, 21 | "precision": 0.7691, 22 | "recall": 0.7676, 23 | "avg_loss": 0.8198, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/RAVDESS.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.3727, 4 | "f1_score": 0.3634, 5 | "precision": 0.4068, 6 | "recall": 0.4308, 7 | "avg_loss": 1.7129, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.4399, 12 | "f1_score": 0.4351, 13 | "precision": 0.4518, 14 | "recall": 0.4742, 15 | "avg_loss": 1.5401, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.3523, 20 | "f1_score": 0.3436, 21 | "precision": 0.3837, 22 | "recall": 0.3857, 23 | "avg_loss": 1.6625, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/TUT2017-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.653, 4 | "f1_score": 0.6472, 5 | "precision": 0.6664, 6 | "recall": 0.653, 7 | "avg_loss": 1.0301, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.6692, 12 | "f1_score": 0.6707, 13 | "precision": 0.6866, 14 | "recall": 0.6692, 15 | "avg_loss": 1.0415, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.6462, 20 | "f1_score": 0.6388, 21 | "precision": 0.6532, 22 | "recall": 0.6462, 23 | "avg_loss": 0.9797, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/TUT2017-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7938, 4 | "f1_score": 0.7925, 5 | "precision": 0.81, 6 | "recall": 0.7939, 7 | "avg_loss": 0.6155, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7405, 12 | "f1_score": 0.74, 13 | "precision": 0.7625, 14 | "recall": 0.7406, 15 | "avg_loss": 0.7379, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7856, 20 | "f1_score": 0.7813, 21 | "precision": 0.8007, 22 | "recall": 0.7856, 23 | "avg_loss": 0.6484, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/TUT2017-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7732, 4 | "f1_score": 0.772, 5 | "precision": 0.7881, 6 | "recall": 0.7732, 7 | "avg_loss": 0.6548, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7547, 12 | "f1_score": 0.749, 13 | "precision": 0.7656, 14 | "recall": 0.7547, 15 | "avg_loss": 0.7676, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7436, 20 | "f1_score": 0.7355, 21 | "precision": 0.7566, 22 | "recall": 0.7436, 23 | "avg_loss": 0.811, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/TUT2017.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7499, 4 | "f1_score": 0.7474, 5 | "precision": 0.7631, 6 | "recall": 0.7499, 7 | "avg_loss": 0.7294, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7215, 12 | "f1_score": 0.7198, 13 | "precision": 0.7426, 14 | "recall": 0.7216, 15 | "avg_loss": 0.8406, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7312, 20 | "f1_score": 0.7246, 21 | "precision": 0.7438, 22 | "recall": 0.7313, 23 | "avg_loss": 0.7826, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/UrbanSound8K.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7576, 4 | "f1_score": 0.7581, 5 | "precision": 0.7739, 6 | "recall": 0.774, 7 | "avg_loss": 0.8955, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7784, 12 | "f1_score": 0.7741, 13 | "precision": 0.7904, 14 | "recall": 0.7913, 15 | "avg_loss": 0.9063, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7597, 20 | "f1_score": 0.7555, 21 | "precision": 0.762, 22 | "recall": 0.7712, 23 | "avg_loss": 0.8523, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/VocalSound.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8081, 4 | "f1_score": 0.8054, 5 | "precision": 0.8116, 6 | "recall": 0.8082, 7 | "avg_loss": 0.6976, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7825, 12 | "f1_score": 0.7759, 13 | "precision": 0.7935, 14 | "recall": 0.7826, 15 | "avg_loss": 0.7931, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7463, 20 | "f1_score": 0.7451, 21 | "precision": 0.7516, 22 | "recall": 0.7463, 23 | "avg_loss": 0.7528, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/Beijing-Opera.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9323, 4 | "f1_score": 0.9309, 5 | "precision": 0.9429, 6 | "recall": 0.9341, 7 | "avg_loss": 0.1794, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.966, 12 | "f1_score": 0.965, 13 | "precision": 0.9661, 14 | "recall": 0.9671, 15 | "avg_loss": 0.1301, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9619, 20 | "f1_score": 0.9605, 21 | "precision": 0.9617, 22 | "recall": 0.9633, 23 | "avg_loss": 0.1255, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50-Actions-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.975, 4 | "f1_score": 0.9749, 5 | "precision": 0.9778, 6 | "recall": 0.975, 7 | "avg_loss": 0.0926, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.925, 12 | "f1_score": 0.9255, 13 | "precision": 0.9314, 14 | "recall": 0.925, 15 | "avg_loss": 0.182, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.925, 20 | "f1_score": 0.9182, 21 | "precision": 0.9504, 22 | "recall": 0.925, 23 | "avg_loss": 0.2313, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50-Actions-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.95, 4 | "f1_score": 0.9492, 5 | "precision": 0.9533, 6 | "recall": 0.95, 7 | "avg_loss": 0.1188, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.925, 12 | "f1_score": 0.9213, 13 | "precision": 0.9327, 14 | "recall": 0.925, 15 | "avg_loss": 0.2505, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9125, 20 | "f1_score": 0.9119, 21 | "precision": 0.9203, 22 | "recall": 0.9125, 23 | "avg_loss": 0.228, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/GT-Music-Genre.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.725, 4 | "f1_score": 0.719, 5 | "precision": 0.7102, 6 | "recall": 0.7397, 7 | "avg_loss": 0.7874, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.695, 12 | "f1_score": 0.6937, 13 | "precision": 0.7301, 14 | "recall": 0.7066, 15 | "avg_loss": 0.9331, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.735, 20 | "f1_score": 0.7153, 21 | "precision": 0.7331, 22 | "recall": 0.7532, 23 | "avg_loss": 0.8354, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/TUT2017-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.5829, 4 | "f1_score": 0.5675, 5 | "precision": 0.5914, 6 | "recall": 0.5829, 7 | "avg_loss": 1.1393, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.5991, 12 | "f1_score": 0.5936, 13 | "precision": 0.6136, 14 | "recall": 0.5991, 15 | "avg_loss": 1.1859, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.5709, 20 | "f1_score": 0.5566, 21 | "precision": 0.5736, 22 | "recall": 0.5709, 23 | "avg_loss": 1.2427, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/TUT2017-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.649, 4 | "f1_score": 0.6452, 5 | "precision": 0.6799, 6 | "recall": 0.6489, 7 | "avg_loss": 0.9844, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.6661, 12 | "f1_score": 0.6516, 13 | "precision": 0.6762, 14 | "recall": 0.6661, 15 | "avg_loss": 0.93, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.6607, 20 | "f1_score": 0.6422, 21 | "precision": 0.6616, 22 | "recall": 0.6607, 23 | "avg_loss": 0.9242, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/TUT2017-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6467, 4 | "f1_score": 0.6406, 5 | "precision": 0.6513, 6 | "recall": 0.6467, 7 | "avg_loss": 0.989, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7046, 12 | "f1_score": 0.7018, 13 | "precision": 0.7207, 14 | "recall": 0.7046, 15 | "avg_loss": 0.9182, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7012, 20 | "f1_score": 0.6994, 21 | "precision": 0.7085, 22 | "recall": 0.7012, 23 | "avg_loss": 0.8826, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/TUT2017-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6778, 4 | "f1_score": 0.6727, 5 | "precision": 0.6871, 6 | "recall": 0.6778, 7 | "avg_loss": 0.9506, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.6969, 12 | "f1_score": 0.6914, 13 | "precision": 0.6993, 14 | "recall": 0.6969, 15 | "avg_loss": 0.9087, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.6772, 20 | "f1_score": 0.6656, 21 | "precision": 0.6856, 22 | "recall": 0.6772, 23 | "avg_loss": 0.9949, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/UrbanSound8K.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.76, 4 | "f1_score": 0.7577, 5 | "precision": 0.7704, 6 | "recall": 0.7707, 7 | "avg_loss": 0.8501, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7378, 12 | "f1_score": 0.7382, 13 | "precision": 0.7493, 14 | "recall": 0.7549, 15 | "avg_loss": 0.8992, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7666, 20 | "f1_score": 0.761, 21 | "precision": 0.7694, 22 | "recall": 0.7758, 23 | "avg_loss": 0.8789, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/Beijing-Opera.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9747, 4 | "f1_score": 0.9738, 5 | "precision": 0.9773, 6 | "recall": 0.9752, 7 | "avg_loss": 0.3628, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9066, 12 | "f1_score": 0.9046, 13 | "precision": 0.92, 14 | "recall": 0.9069, 15 | "avg_loss": 0.4189, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9787, 20 | "f1_score": 0.9786, 21 | "precision": 0.9797, 22 | "recall": 0.9793, 23 | "avg_loss": 0.0881, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/ESC50-Actions-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.975, 4 | "f1_score": 0.9749, 5 | "precision": 0.9778, 6 | "recall": 0.975, 7 | "avg_loss": 0.3221, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.95, 12 | "f1_score": 0.9489, 13 | "precision": 0.9542, 14 | "recall": 0.95, 15 | "avg_loss": 0.4286, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9375, 20 | "f1_score": 0.9364, 21 | "precision": 0.9431, 22 | "recall": 0.9375, 23 | "avg_loss": 0.5072, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/ESC50-Actions-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.975, 4 | "f1_score": 0.975, 5 | "precision": 0.9764, 6 | "recall": 0.975, 7 | "avg_loss": 0.1443, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.975, 12 | "f1_score": 0.9749, 13 | "precision": 0.9778, 14 | "recall": 0.975, 15 | "avg_loss": 0.0247, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.95, 20 | "f1_score": 0.9497, 21 | "precision": 0.9546, 22 | "recall": 0.95, 23 | "avg_loss": 0.2049, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/GT-Music-Genre.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.79, 4 | "f1_score": 0.7914, 5 | "precision": 0.7952, 6 | "recall": 0.7964, 7 | "avg_loss": 1.6149, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.785, 12 | "f1_score": 0.7888, 13 | "precision": 0.8077, 14 | "recall": 0.7902, 15 | "avg_loss": 1.4901, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.825, 20 | "f1_score": 0.8257, 21 | "precision": 0.8287, 22 | "recall": 0.8298, 23 | "avg_loss": 1.7199, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/NS-Instruments.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6394, 4 | "f1_score": 0.6418, 5 | "precision": 0.641, 6 | "recall": 0.6854, 7 | "avg_loss": 3.9794, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.6108, 12 | "f1_score": 0.6062, 13 | "precision": 0.6166, 14 | "recall": 0.6508, 15 | "avg_loss": 4.4305, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.6648, 20 | "f1_score": 0.6834, 21 | "precision": 0.6709, 22 | "recall": 0.7294, 23 | "avg_loss": 2.6739, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/TUT2017-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6897, 4 | "f1_score": 0.6849, 5 | "precision": 0.6986, 6 | "recall": 0.6897, 7 | "avg_loss": 1.6695, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7239, 12 | "f1_score": 0.7201, 13 | "precision": 0.7299, 14 | "recall": 0.7239, 15 | "avg_loss": 1.3417, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7077, 20 | "f1_score": 0.6943, 21 | "precision": 0.6996, 22 | "recall": 0.7077, 23 | "avg_loss": 1.3855, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/TUT2017-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8067, 4 | "f1_score": 0.8073, 5 | "precision": 0.8207, 6 | "recall": 0.8066, 7 | "avg_loss": 0.8389, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8215, 12 | "f1_score": 0.8194, 13 | "precision": 0.825, 14 | "recall": 0.8216, 15 | "avg_loss": 0.8609, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8007, 20 | "f1_score": 0.8013, 21 | "precision": 0.8094, 22 | "recall": 0.8007, 23 | "avg_loss": 0.9164, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/TUT2017-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8586, 4 | "f1_score": 0.8578, 5 | "precision": 0.8597, 6 | "recall": 0.8586, 7 | "avg_loss": 0.5988, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8383, 12 | "f1_score": 0.8369, 13 | "precision": 0.8433, 14 | "recall": 0.8383, 15 | "avg_loss": 0.8137, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7725, 20 | "f1_score": 0.7826, 21 | "precision": 0.8053, 22 | "recall": 0.7725, 23 | "avg_loss": 1.0751, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/TUT2017-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8288, 4 | "f1_score": 0.8268, 5 | "precision": 0.85, 6 | "recall": 0.8288, 7 | "avg_loss": 0.8601, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.835, 12 | "f1_score": 0.8331, 13 | "precision": 0.8377, 14 | "recall": 0.835, 15 | "avg_loss": 0.8274, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8105, 20 | "f1_score": 0.8096, 21 | "precision": 0.8242, 22 | "recall": 0.8105, 23 | "avg_loss": 1.0036, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/UrbanSound8K.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.812, 4 | "f1_score": 0.8158, 5 | "precision": 0.8271, 6 | "recall": 0.8243, 7 | "avg_loss": 1.7177, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8037, 12 | "f1_score": 0.8071, 13 | "precision": 0.8151, 14 | "recall": 0.817, 15 | "avg_loss": 1.8778, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8074, 20 | "f1_score": 0.8101, 21 | "precision": 0.8198, 22 | "recall": 0.8185, 23 | "avg_loss": 1.7769, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50-Actions-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9375, 4 | "f1_score": 0.9382, 5 | "precision": 0.9505, 6 | "recall": 0.9375, 7 | "avg_loss": 0.1149, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.975, 12 | "f1_score": 0.9749, 13 | "precision": 0.9778, 14 | "recall": 0.975, 15 | "avg_loss": 0.062, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9625, 20 | "f1_score": 0.9621, 21 | "precision": 0.9689, 22 | "recall": 0.9625, 23 | "avg_loss": 0.1324, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50-Actions-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9875, 4 | "f1_score": 0.9875, 5 | "precision": 0.9889, 6 | "recall": 0.9875, 7 | "avg_loss": 0.0455, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9875, 12 | "f1_score": 0.9875, 13 | "precision": 0.9889, 14 | "recall": 0.9875, 15 | "avg_loss": 0.0669, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.95, 20 | "f1_score": 0.9498, 21 | "precision": 0.9616, 22 | "recall": 0.95, 23 | "avg_loss": 0.1235, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/ESC50-Actions-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.975, 4 | "f1_score": 0.9749, 5 | "precision": 0.9778, 6 | "recall": 0.975, 7 | "avg_loss": 0.0466, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.95, 12 | "f1_score": 0.9495, 13 | "precision": 0.9578, 14 | "recall": 0.95, 15 | "avg_loss": 0.0982, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9625, 20 | "f1_score": 0.9624, 21 | "precision": 0.9667, 22 | "recall": 0.9625, 23 | "avg_loss": 0.0966, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/NS-Instruments.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.5996, 4 | "f1_score": 0.6199, 5 | "precision": 0.6246, 6 | "recall": 0.6578, 7 | "avg_loss": 1.6584, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.574, 12 | "f1_score": 0.584, 13 | "precision": 0.6134, 14 | "recall": 0.6306, 15 | "avg_loss": 1.6444, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.6438, 20 | "f1_score": 0.6417, 21 | "precision": 0.6403, 22 | "recall": 0.6877, 23 | "avg_loss": 1.4087, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/TUT2017-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7796, 4 | "f1_score": 0.7779, 5 | "precision": 0.7881, 6 | "recall": 0.7796, 7 | "avg_loss": 0.6173, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7217, 12 | "f1_score": 0.7193, 13 | "precision": 0.7555, 14 | "recall": 0.7217, 15 | "avg_loss": 0.8156, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7496, 20 | "f1_score": 0.7426, 21 | "precision": 0.7647, 22 | "recall": 0.7497, 23 | "avg_loss": 0.6913, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/UrbanSound8K-FOLD10.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.81, 4 | "f1_score": 0.8011, 5 | "precision": 0.7986, 6 | "recall": 0.8242, 7 | "avg_loss": 0.7701, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7252, 12 | "f1_score": 0.7244, 13 | "precision": 0.7523, 14 | "recall": 0.7459, 15 | "avg_loss": 0.953, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8076, 20 | "f1_score": 0.8007, 21 | "precision": 0.8031, 22 | "recall": 0.8181, 23 | "avg_loss": 0.7598, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/UrbanSound8K-FOLD9.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7549, 4 | "f1_score": 0.7627, 5 | "precision": 0.7798, 6 | "recall": 0.7789, 7 | "avg_loss": 1.1373, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8554, 12 | "f1_score": 0.8661, 13 | "precision": 0.874, 14 | "recall": 0.872, 15 | "avg_loss": 0.6662, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7659, 20 | "f1_score": 0.7651, 21 | "precision": 0.7795, 22 | "recall": 0.7904, 23 | "avg_loss": 1.0182, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/Beijing-Opera-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8958, 4 | "f1_score": 0.8942, 5 | "precision": 0.909, 6 | "recall": 0.9006, 7 | "avg_loss": 0.2626, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9792, 12 | "f1_score": 0.9781, 13 | "precision": 0.9773, 14 | "recall": 0.9808, 15 | "avg_loss": 0.0817, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9583, 20 | "f1_score": 0.9564, 21 | "precision": 0.9583, 22 | "recall": 0.9615, 23 | "avg_loss": 0.162, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/Beijing-Opera-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9574, 4 | "f1_score": 0.9564, 5 | "precision": 0.9583, 6 | "recall": 0.9599, 7 | "avg_loss": 0.1015, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9787, 12 | "f1_score": 0.9799, 13 | "precision": 0.9821, 14 | "recall": 0.9792, 15 | "avg_loss": 0.0963, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9787, 20 | "f1_score": 0.98, 21 | "precision": 0.9808, 22 | "recall": 0.9808, 23 | "avg_loss": 0.1062, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/Beijing-Opera-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9149, 4 | "f1_score": 0.9148, 5 | "precision": 0.9286, 6 | "recall": 0.9199, 7 | "avg_loss": 0.1992, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9362, 12 | "f1_score": 0.9334, 13 | "precision": 0.9353, 14 | "recall": 0.9349, 15 | "avg_loss": 0.18, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9787, 20 | "f1_score": 0.9772, 21 | "precision": 0.9773, 22 | "recall": 0.9792, 23 | "avg_loss": 0.0778, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/ESC50-Actions-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9375, 4 | "f1_score": 0.937, 5 | "precision": 0.9467, 6 | "recall": 0.9375, 7 | "avg_loss": 0.1948, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.925, 12 | "f1_score": 0.9235, 13 | "precision": 0.9356, 14 | "recall": 0.925, 15 | "avg_loss": 0.1893, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9375, 20 | "f1_score": 0.936, 21 | "precision": 0.9467, 22 | "recall": 0.9375, 23 | "avg_loss": 0.1906, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/NS-Instruments.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.5728, 4 | "f1_score": 0.6028, 5 | "precision": 0.6047, 6 | "recall": 0.6526, 7 | "avg_loss": 1.7179, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.5562, 12 | "f1_score": 0.5593, 13 | "precision": 0.5862, 14 | "recall": 0.5856, 15 | "avg_loss": 1.7017, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.6177, 20 | "f1_score": 0.6153, 21 | "precision": 0.6119, 22 | "recall": 0.6627, 23 | "avg_loss": 1.3542, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/UrbanSound8K-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7789, 4 | "f1_score": 0.7811, 5 | "precision": 0.7993, 6 | "recall": 0.7941, 7 | "avg_loss": 0.6321, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7468, 12 | "f1_score": 0.7454, 13 | "precision": 0.7532, 14 | "recall": 0.7691, 15 | "avg_loss": 0.784, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7732, 20 | "f1_score": 0.7745, 21 | "precision": 0.7721, 22 | "recall": 0.7871, 23 | "avg_loss": 0.7014, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/UrbanSound8K-FOLD10.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7754, 4 | "f1_score": 0.7775, 5 | "precision": 0.7839, 6 | "recall": 0.7903, 7 | "avg_loss": 0.8331, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.6941, 12 | "f1_score": 0.702, 13 | "precision": 0.7414, 14 | "recall": 0.719, 15 | "avg_loss": 1.1581, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7873, 20 | "f1_score": 0.7839, 21 | "precision": 0.7862, 22 | "recall": 0.8029, 23 | "avg_loss": 0.7831, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/UrbanSound8K-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7928, 4 | "f1_score": 0.7821, 5 | "precision": 0.7988, 6 | "recall": 0.7979, 7 | "avg_loss": 0.7417, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7917, 12 | "f1_score": 0.7963, 13 | "precision": 0.8064, 14 | "recall": 0.8116, 15 | "avg_loss": 0.6845, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8108, 20 | "f1_score": 0.8096, 21 | "precision": 0.8095, 22 | "recall": 0.8131, 23 | "avg_loss": 0.6086, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/UrbanSound8K-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7243, 4 | "f1_score": 0.7125, 5 | "precision": 0.7168, 6 | "recall": 0.7242, 7 | "avg_loss": 0.926, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.6995, 12 | "f1_score": 0.7023, 13 | "precision": 0.7035, 14 | "recall": 0.7081, 15 | "avg_loss": 1.1094, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7341, 20 | "f1_score": 0.7193, 21 | "precision": 0.7403, 22 | "recall": 0.7325, 23 | "avg_loss": 1.0645, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/UrbanSound8K-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8131, 4 | "f1_score": 0.8032, 5 | "precision": 0.8217, 6 | "recall": 0.8023, 7 | "avg_loss": 0.5833, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7273, 12 | "f1_score": 0.7297, 13 | "precision": 0.7545, 14 | "recall": 0.735, 15 | "avg_loss": 0.9209, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7596, 20 | "f1_score": 0.7556, 21 | "precision": 0.7802, 22 | "recall": 0.7483, 23 | "avg_loss": 0.8186, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/UrbanSound8K-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7917, 4 | "f1_score": 0.782, 5 | "precision": 0.8052, 6 | "recall": 0.8015, 7 | "avg_loss": 0.6773, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.765, 12 | "f1_score": 0.7591, 13 | "precision": 0.7727, 14 | "recall": 0.7773, 15 | "avg_loss": 0.7294, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8184, 20 | "f1_score": 0.8211, 21 | "precision": 0.8255, 22 | "recall": 0.8275, 23 | "avg_loss": 0.609, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/UrbanSound8K-FOLD6.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6865, 4 | "f1_score": 0.672, 5 | "precision": 0.6791, 6 | "recall": 0.7058, 7 | "avg_loss": 1.3617, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7217, 12 | "f1_score": 0.7164, 13 | "precision": 0.7172, 14 | "recall": 0.7301, 15 | "avg_loss": 1.0431, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7193, 20 | "f1_score": 0.7102, 21 | "precision": 0.7213, 22 | "recall": 0.7253, 23 | "avg_loss": 1.3693, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/UrbanSound8K-FOLD7.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7446, 4 | "f1_score": 0.7334, 5 | "precision": 0.7425, 6 | "recall": 0.7478, 7 | "avg_loss": 0.7803, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7566, 12 | "f1_score": 0.7427, 13 | "precision": 0.7394, 14 | "recall": 0.7714, 15 | "avg_loss": 0.7967, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7041, 20 | "f1_score": 0.6876, 21 | "precision": 0.6949, 22 | "recall": 0.7179, 23 | "avg_loss": 0.9885, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/UrbanSound8K-FOLD8.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7109, 4 | "f1_score": 0.7294, 5 | "precision": 0.7394, 6 | "recall": 0.7349, 7 | "avg_loss": 1.0237, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.6737, 12 | "f1_score": 0.6752, 13 | "precision": 0.6923, 14 | "recall": 0.7022, 15 | "avg_loss": 1.0644, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7494, 20 | "f1_score": 0.7349, 21 | "precision": 0.7359, 22 | "recall": 0.7768, 23 | "avg_loss": 0.9744, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/UrbanSound8K-FOLD9.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7819, 4 | "f1_score": 0.804, 5 | "precision": 0.817, 6 | "recall": 0.8087, 7 | "avg_loss": 0.9417, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8015, 12 | "f1_score": 0.8127, 13 | "precision": 0.8125, 14 | "recall": 0.8252, 15 | "avg_loss": 0.7012, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.81, 20 | "f1_score": 0.8133, 21 | "precision": 0.8281, 22 | "recall": 0.8267, 23 | "avg_loss": 0.8716, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/ESC50-Actions-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9375, 4 | "f1_score": 0.9368, 5 | "precision": 0.9556, 6 | "recall": 0.9375, 7 | "avg_loss": 0.5431, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9875, 12 | "f1_score": 0.9875, 13 | "precision": 0.9889, 14 | "recall": 0.9875, 15 | "avg_loss": 0.05, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9625, 20 | "f1_score": 0.9614, 21 | "precision": 0.9667, 22 | "recall": 0.9625, 23 | "avg_loss": 0.1677, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/ESC50-Actions-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9625, 4 | "f1_score": 0.9621, 5 | "precision": 0.9689, 6 | "recall": 0.9625, 7 | "avg_loss": 0.2894, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9125, 12 | "f1_score": 0.9113, 13 | "precision": 0.9288, 14 | "recall": 0.9125, 15 | "avg_loss": 0.4741, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.975, 20 | "f1_score": 0.9746, 21 | "precision": 0.98, 22 | "recall": 0.975, 23 | "avg_loss": 0.2188, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/UrbanSound8K-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8133, 4 | "f1_score": 0.8277, 5 | "precision": 0.8349, 6 | "recall": 0.8318, 7 | "avg_loss": 1.4857, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8373, 12 | "f1_score": 0.8339, 13 | "precision": 0.8351, 14 | "recall": 0.8554, 15 | "avg_loss": 1.4359, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7973, 20 | "f1_score": 0.8035, 21 | "precision": 0.8055, 22 | "recall": 0.8186, 23 | "avg_loss": 1.2868, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/UrbanSound8K-FOLD10.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8435, 4 | "f1_score": 0.8489, 5 | "precision": 0.8482, 6 | "recall": 0.8583, 7 | "avg_loss": 1.0822, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7384, 12 | "f1_score": 0.7407, 13 | "precision": 0.7672, 14 | "recall": 0.7564, 15 | "avg_loss": 2.1358, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8124, 20 | "f1_score": 0.8161, 21 | "precision": 0.8294, 22 | "recall": 0.8302, 23 | "avg_loss": 1.544, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/UrbanSound8K-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8041, 4 | "f1_score": 0.8121, 5 | "precision": 0.8225, 6 | "recall": 0.8243, 7 | "avg_loss": 1.8437, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8435, 12 | "f1_score": 0.8508, 13 | "precision": 0.8504, 14 | "recall": 0.8615, 15 | "avg_loss": 0.9204, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8559, 20 | "f1_score": 0.8643, 21 | "precision": 0.8756, 22 | "recall": 0.8661, 23 | "avg_loss": 1.2119, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/UrbanSound8K-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7395, 4 | "f1_score": 0.7352, 5 | "precision": 0.7541, 6 | "recall": 0.7428, 7 | "avg_loss": 1.9464, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7438, 12 | "f1_score": 0.7564, 13 | "precision": 0.7773, 14 | "recall": 0.7535, 15 | "avg_loss": 2.3971, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7146, 20 | "f1_score": 0.7071, 21 | "precision": 0.7215, 22 | "recall": 0.7158, 23 | "avg_loss": 2.7911, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/UrbanSound8K-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8687, 4 | "f1_score": 0.8619, 5 | "precision": 0.8801, 6 | "recall": 0.8571, 7 | "avg_loss": 0.9007, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8384, 12 | "f1_score": 0.8364, 13 | "precision": 0.8466, 14 | "recall": 0.8365, 15 | "avg_loss": 1.2304, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8485, 20 | "f1_score": 0.8425, 21 | "precision": 0.8637, 22 | "recall": 0.8399, 23 | "avg_loss": 1.466, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/UrbanSound8K-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8419, 4 | "f1_score": 0.8357, 5 | "precision": 0.8597, 6 | "recall": 0.8506, 7 | "avg_loss": 1.1243, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8697, 12 | "f1_score": 0.8677, 13 | "precision": 0.8678, 14 | "recall": 0.8766, 15 | "avg_loss": 0.967, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8291, 20 | "f1_score": 0.8319, 21 | "precision": 0.8485, 22 | "recall": 0.8374, 23 | "avg_loss": 0.9734, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/UrbanSound8K-FOLD6.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.819, 4 | "f1_score": 0.8182, 5 | "precision": 0.8215, 6 | "recall": 0.8282, 7 | "avg_loss": 3.1229, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7789, 12 | "f1_score": 0.7856, 13 | "precision": 0.7962, 14 | "recall": 0.7885, 15 | "avg_loss": 3.9193, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7485, 20 | "f1_score": 0.7447, 21 | "precision": 0.7539, 22 | "recall": 0.7571, 23 | "avg_loss": 3.7871, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/UrbanSound8K-FOLD7.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7721, 4 | "f1_score": 0.7742, 5 | "precision": 0.7832, 6 | "recall": 0.7823, 7 | "avg_loss": 2.3182, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7995, 12 | "f1_score": 0.7954, 13 | "precision": 0.7974, 14 | "recall": 0.8059, 15 | "avg_loss": 1.4955, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8162, 20 | "f1_score": 0.8171, 21 | "precision": 0.8159, 22 | "recall": 0.824, 23 | "avg_loss": 1.8344, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/UrbanSound8K-FOLD8.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8102, 4 | "f1_score": 0.8298, 5 | "precision": 0.8463, 6 | "recall": 0.8329, 7 | "avg_loss": 1.1996, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7233, 12 | "f1_score": 0.7308, 13 | "precision": 0.7358, 14 | "recall": 0.7516, 15 | "avg_loss": 2.7501, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7866, 20 | "f1_score": 0.8035, 21 | "precision": 0.8115, 22 | "recall": 0.808, 23 | "avg_loss": 1.2759, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/UrbanSound8K-FOLD9.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8076, 4 | "f1_score": 0.8141, 5 | "precision": 0.8201, 6 | "recall": 0.835, 7 | "avg_loss": 2.1535, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.864, 12 | "f1_score": 0.8735, 13 | "precision": 0.8773, 14 | "recall": 0.8841, 15 | "avg_loss": 1.5268, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8652, 20 | "f1_score": 0.8707, 21 | "precision": 0.8723, 22 | "recall": 0.8877, 23 | "avg_loss": 1.598, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/Beijing-Opera-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9574, 4 | "f1_score": 0.9564, 5 | "precision": 0.9583, 6 | "recall": 0.9599, 7 | "avg_loss": 0.0868, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9787, 12 | "f1_score": 0.9772, 13 | "precision": 0.9773, 14 | "recall": 0.9792, 15 | "avg_loss": 0.0396, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9787, 20 | "f1_score": 0.9772, 21 | "precision": 0.9773, 22 | "recall": 0.9792, 23 | "avg_loss": 0.0403, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/UrbanSound8K-FOLD1.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7537, 4 | "f1_score": 0.7596, 5 | "precision": 0.7849, 6 | "recall": 0.7752, 7 | "avg_loss": 0.7558, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7526, 12 | "f1_score": 0.751, 13 | "precision": 0.7728, 14 | "recall": 0.7795, 15 | "avg_loss": 0.8645, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7388, 20 | "f1_score": 0.7397, 21 | "precision": 0.7537, 22 | "recall": 0.7655, 23 | "avg_loss": 0.7558, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/UrbanSound8K-FOLD2.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7489, 4 | "f1_score": 0.7506, 5 | "precision": 0.7562, 6 | "recall": 0.7753, 7 | "avg_loss": 0.9968, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8637, 12 | "f1_score": 0.8643, 13 | "precision": 0.8656, 14 | "recall": 0.8715, 15 | "avg_loss": 0.5291, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8074, 20 | "f1_score": 0.8016, 21 | "precision": 0.8046, 22 | "recall": 0.8087, 23 | "avg_loss": 0.6422, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/UrbanSound8K-FOLD3.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6714, 4 | "f1_score": 0.6635, 5 | "precision": 0.6838, 6 | "recall": 0.6878, 7 | "avg_loss": 1.0453, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7492, 12 | "f1_score": 0.7517, 13 | "precision": 0.7647, 14 | "recall": 0.7504, 15 | "avg_loss": 0.9332, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7027, 20 | "f1_score": 0.6934, 21 | "precision": 0.6926, 22 | "recall": 0.7052, 23 | "avg_loss": 0.9017, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/UrbanSound8K-FOLD4.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7889, 4 | "f1_score": 0.7781, 5 | "precision": 0.7937, 6 | "recall": 0.781, 7 | "avg_loss": 0.6893, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7313, 12 | "f1_score": 0.7134, 13 | "precision": 0.7518, 14 | "recall": 0.7299, 15 | "avg_loss": 1.1393, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7586, 20 | "f1_score": 0.7523, 21 | "precision": 0.7693, 22 | "recall": 0.7494, 23 | "avg_loss": 0.8274, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/UrbanSound8K-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8408, 4 | "f1_score": 0.8328, 5 | "precision": 0.8447, 6 | "recall": 0.8505, 7 | "avg_loss": 0.5257, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7788, 12 | "f1_score": 0.7589, 13 | "precision": 0.7926, 14 | "recall": 0.7887, 15 | "avg_loss": 0.7447, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.8109, 20 | "f1_score": 0.8168, 21 | "precision": 0.8254, 22 | "recall": 0.8221, 23 | "avg_loss": 0.5836, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/UrbanSound8K-FOLD6.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7886, 4 | "f1_score": 0.7851, 5 | "precision": 0.7945, 6 | "recall": 0.7998, 7 | "avg_loss": 0.855, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7327, 12 | "f1_score": 0.7281, 13 | "precision": 0.7453, 14 | "recall": 0.7462, 15 | "avg_loss": 1.6305, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.6902, 20 | "f1_score": 0.6733, 21 | "precision": 0.6809, 22 | "recall": 0.6929, 23 | "avg_loss": 1.2978, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/UrbanSound8K-FOLD7.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.7303, 4 | "f1_score": 0.7184, 5 | "precision": 0.7584, 6 | "recall": 0.7424, 7 | "avg_loss": 0.9887, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8126, 12 | "f1_score": 0.8128, 13 | "precision": 0.8131, 14 | "recall": 0.8244, 15 | "avg_loss": 0.7321, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7792, 20 | "f1_score": 0.7775, 21 | "precision": 0.7745, 22 | "recall": 0.7965, 23 | "avg_loss": 0.7008, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/cocoop/UrbanSound8K-FOLD8.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.6886, 4 | "f1_score": 0.7292, 5 | "precision": 0.7442, 6 | "recall": 0.7249, 7 | "avg_loss": 1.1906, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.7829, 12 | "f1_score": 0.7703, 13 | "precision": 0.7714, 14 | "recall": 0.8049, 15 | "avg_loss": 0.8706, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.7357, 20 | "f1_score": 0.7342, 21 | "precision": 0.7368, 22 | "recall": 0.7631, 23 | "avg_loss": 1.0355, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/coop/Beijing-Opera-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.8936, 4 | "f1_score": 0.8892, 5 | "precision": 0.9184, 6 | "recall": 0.8899, 7 | "avg_loss": 0.2399, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.9574, 12 | "f1_score": 0.9564, 13 | "precision": 0.9583, 14 | "recall": 0.9615, 15 | "avg_loss": 0.1499, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9362, 20 | "f1_score": 0.9343, 21 | "precision": 0.9337, 22 | "recall": 0.9365, 23 | "avg_loss": 0.1762, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /logs/palm/Beijing-Opera-FOLD5.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed_0": { 3 | "accuracy": 0.9574, 4 | "f1_score": 0.9571, 5 | "precision": 0.9667, 6 | "recall": 0.9545, 7 | "avg_loss": 0.1089, 8 | "epoch": 49 9 | }, 10 | "seed_1": { 11 | "accuracy": 0.8298, 12 | "f1_score": 0.8293, 13 | "precision": 0.8681, 14 | "recall": 0.8299, 15 | "avg_loss": 0.7705, 16 | "epoch": 49 17 | }, 18 | "seed_2": { 19 | "accuracy": 0.9574, 20 | "f1_score": 0.9557, 21 | "precision": 0.9571, 22 | "recall": 0.9558, 23 | "avg_loss": 0.0579, 24 | "epoch": 49 25 | } 26 | } -------------------------------------------------------------------------------- /pengi/configs/base.yml: -------------------------------------------------------------------------------- 1 | # TEXT ENCODER CONFIG 2 | use_text_model: True 3 | text_model: 'openai/clip-vit-base-patch16' 4 | transformer_embed_dim: 512 5 | freeze_text_encoder_weights: True 6 | use_pretrained_clap_weights: False 7 | 8 | # AUDIO ENCODER CONFIG 9 | audioenc_name: 'HTSAT' 10 | out_emb: 768 11 | fmin: 50 12 | fmax: 8000 13 | n_fft: 1024 14 | hop_size: 320 15 | mel_bins: 64 16 | window_size: 1024 17 | specaug: False 18 | mixup: False 19 | use_pretrained_audioencoder: False 20 | freeze_audio_encoder_weights: False 21 | 22 | # CLAP PROJECTION SPACE CONFIG 23 | d_proj: 1024 24 | 25 | # DATASET CONFIGS 26 | dataset_config: 27 | sampling_rate: 44100 28 | duration: 7 29 | enc_text_len: 40 30 | dec_text_len: 77 31 | 32 | # DECODER CONFIG 33 | text_decoder: 'gpt2' 34 | prefix_length: 40 35 | prefix_length_clip: 40 36 | mapping_type: 'transformer' 37 | num_layers: 8 38 | normalize_prefix: True 39 | freeze_gpt_weights: True -------------------------------------------------------------------------------- /pengi/configs/base_no_text_enc.yml: -------------------------------------------------------------------------------- 1 | # TEXT ENCODER CONFIG 2 | use_text_model: False 3 | text_model: 'openai/clip-vit-base-patch16' 4 | transformer_embed_dim: 512 5 | freeze_text_encoder_weights: True 6 | use_pretrained_clap_weights: False 7 | 8 | # AUDIO ENCODER CONFIG 9 | audioenc_name: 'HTSAT' 10 | out_emb: 768 11 | fmin: 50 12 | fmax: 8000 13 | n_fft: 1024 14 | hop_size: 320 15 | mel_bins: 64 16 | window_size: 1024 17 | specaug: False 18 | mixup: False 19 | use_pretrained_audioencoder: False 20 | freeze_audio_encoder_weights: False 21 | 22 | # CLAP PROJECTION SPACE CONFIG 23 | d_proj: 1024 24 | 25 | # DATASET CONFIGS 26 | dataset_config: 27 | sampling_rate: 44100 28 | duration: 7 29 | enc_text_len: 40 30 | dec_text_len: 77 31 | 32 | # DECODER CONFIG 33 | text_decoder: 'gpt2' 34 | prefix_length: 40 35 | prefix_length_clip: 40 36 | mapping_type: 'transformer' 37 | num_layers: 8 38 | normalize_prefix: True 39 | freeze_gpt_weights: True -------------------------------------------------------------------------------- /scripts/run_all_datasets_all_methods.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for MODEL in zeroshot coop cocoop palm; do sh scripts/beijing_opera.sh $MODEL; done 3 | for MODEL in zeroshot coop cocoop palm; do sh scripts/crema_d.sh $MODEL; done 4 | for MODEL in zeroshot coop cocoop palm; do sh scripts/esc50_actions.sh $MODEL; done 5 | for MODEL in zeroshot coop cocoop palm; do sh scripts/esc50.sh $MODEL; done 6 | for MODEL in zeroshot coop cocoop palm; do sh scripts/gt_music_genre.sh $MODEL; done 7 | for MODEL in zeroshot coop cocoop palm; do sh scripts/ns_instruments.sh $MODEL; done 8 | for MODEL in zeroshot coop cocoop palm; do sh scripts/ravdess.sh $MODEL; done 9 | for MODEL in zeroshot coop cocoop palm; do sh scripts/sesa.sh $MODEL; done 10 | for MODEL in zeroshot coop cocoop palm; do sh scripts/tut.sh $MODEL; done 11 | for MODEL in zeroshot coop cocoop palm; do sh scripts/urban_sound.sh $MODEL; done 12 | for MODEL in zeroshot coop cocoop palm; do sh scripts/vocal_sound.sh $MODEL; done -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Asif Hanif 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /logs/coop/accuracy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Beijing-Opera": [ 3 | 0.9323, 4 | 0.966, 5 | 0.9619, 6 | 0.9533999999999999 7 | ], 8 | "CREMA-D": [ 9 | 0.313, 10 | 0.4197, 11 | 0.276, 12 | 0.3362333333333334 13 | ], 14 | "ESC50-Actions": [ 15 | 0.9625, 16 | 0.94, 17 | 0.955, 18 | 0.9525 19 | ], 20 | "ESC50": [ 21 | 0.941, 22 | 0.939, 23 | 0.9345, 24 | 0.9381666666666666 25 | ], 26 | "GT-Music-Genre": [ 27 | 0.725, 28 | 0.695, 29 | 0.735, 30 | 0.7183333333333333 31 | ], 32 | "NS-Instruments": [ 33 | 0.5728, 34 | 0.5562, 35 | 0.6177, 36 | 0.5822333333333334 37 | ], 38 | "RAVDESS": [ 39 | 0.3849, 40 | 0.2688, 41 | 0.3422, 42 | 0.3319666666666667 43 | ], 44 | "SESA": [ 45 | 0.9143, 46 | 0.8952, 47 | 0.8762, 48 | 0.8952333333333332 49 | ], 50 | "TUT2017": [ 51 | 0.6391, 52 | 0.6667, 53 | 0.6525, 54 | 0.6527666666666666 55 | ], 56 | "UrbanSound8K": [ 57 | 0.76, 58 | 0.7378, 59 | 0.7666, 60 | 0.7548 61 | ], 62 | "VocalSound": [ 63 | 0.7162, 64 | 0.7485, 65 | 0.6642, 66 | 0.7096333333333334 67 | ] 68 | } -------------------------------------------------------------------------------- /logs/palm/accuracy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Beijing-Opera": [ 3 | 0.9747, 4 | 0.9066, 5 | 0.9787, 6 | 0.9533333333333333 7 | ], 8 | "CREMA-D": [ 9 | 0.4453, 10 | 0.358, 11 | 0.2344, 12 | 0.34589999999999993 13 | ], 14 | "ESC50-Actions": [ 15 | 0.97, 16 | 0.9625, 17 | 0.965, 18 | 0.9658333333333333 19 | ], 20 | "ESC50": [ 21 | 0.956, 22 | 0.96, 23 | 0.962, 24 | 0.9593333333333334 25 | ], 26 | "GT-Music-Genre": [ 27 | 0.79, 28 | 0.785, 29 | 0.825, 30 | 0.8000000000000002 31 | ], 32 | "NS-Instruments": [ 33 | 0.6394, 34 | 0.6108, 35 | 0.6648, 36 | 0.6383333333333333 37 | ], 38 | "RAVDESS": [ 39 | 0.4562, 40 | 0.4603, 41 | 0.4623, 42 | 0.4596 43 | ], 44 | "SESA": [ 45 | 0.8857, 46 | 0.9143, 47 | 0.8857, 48 | 0.8952333333333334 49 | ], 50 | "TUT2017": [ 51 | 0.7959, 52 | 0.8047, 53 | 0.7729, 54 | 0.7911666666666667 55 | ], 56 | "UrbanSound8K": [ 57 | 0.812, 58 | 0.8037, 59 | 0.8074, 60 | 0.8077 61 | ], 62 | "VocalSound": [ 63 | 0.8101, 64 | 0.8168, 65 | 0.7964, 66 | 0.8077666666666667 67 | ] 68 | } -------------------------------------------------------------------------------- /logs/cocoop/accuracy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Beijing-Opera": [ 3 | 0.9577, 4 | 0.983, 5 | 0.9916, 6 | 0.9774333333333334 7 | ], 8 | "CREMA-D": [ 9 | 0.2539, 10 | 0.3358, 11 | 0.3156, 12 | 0.3017666666666667 13 | ], 14 | "ESC50-Actions": [ 15 | 0.97, 16 | 0.9625, 17 | 0.965, 18 | 0.9658333333333333 19 | ], 20 | "ESC50": [ 21 | 0.946, 22 | 0.937, 23 | 0.945, 24 | 0.9426666666666667 25 | ], 26 | "GT-Music-Genre": [ 27 | 0.75, 28 | 0.745, 29 | 0.76, 30 | 0.7516666666666666 31 | ], 32 | "NS-Instruments": [ 33 | 0.5996, 34 | 0.574, 35 | 0.6438, 36 | 0.6058 37 | ], 38 | "RAVDESS": [ 39 | 0.3727, 40 | 0.4399, 41 | 0.3523, 42 | 0.38830000000000003 43 | ], 44 | "SESA": [ 45 | 0.8381, 46 | 0.8762, 47 | 0.8952, 48 | 0.8698333333333332 49 | ], 50 | "TUT2017": [ 51 | 0.7499, 52 | 0.7215, 53 | 0.7312, 54 | 0.7342 55 | ], 56 | "UrbanSound8K": [ 57 | 0.7576, 58 | 0.7784, 59 | 0.7597, 60 | 0.7652333333333333 61 | ], 62 | "VocalSound": [ 63 | 0.8081, 64 | 0.7825, 65 | 0.7463, 66 | 0.7789666666666667 67 | ] 68 | } -------------------------------------------------------------------------------- /logs/coop/f1_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "Beijing-Opera": [ 3 | 0.9309, 4 | 0.965, 5 | 0.9605, 6 | 0.9521333333333333 7 | ], 8 | "CREMA-D": [ 9 | 0.2609, 10 | 0.2745, 11 | 0.2389, 12 | 0.25810000000000005 13 | ], 14 | "ESC50-Actions": [ 15 | 0.9618, 16 | 0.939, 17 | 0.9532, 18 | 0.9513333333333334 19 | ], 20 | "ESC50": [ 21 | 0.9399, 22 | 0.9369, 23 | 0.9335, 24 | 0.9367666666666666 25 | ], 26 | "GT-Music-Genre": [ 27 | 0.719, 28 | 0.6937, 29 | 0.7153, 30 | 0.7093333333333334 31 | ], 32 | "NS-Instruments": [ 33 | 0.6028, 34 | 0.5593, 35 | 0.6153, 36 | 0.5924666666666667 37 | ], 38 | "RAVDESS": [ 39 | 0.3668, 40 | 0.2385, 41 | 0.3075, 42 | 0.30426666666666663 43 | ], 44 | "SESA": [ 45 | 0.9044, 46 | 0.8959, 47 | 0.8639, 48 | 0.8880666666666667 49 | ], 50 | "TUT2017": [ 51 | 0.6315, 52 | 0.6596, 53 | 0.6409, 54 | 0.644 55 | ], 56 | "UrbanSound8K": [ 57 | 0.7577, 58 | 0.7382, 59 | 0.761, 60 | 0.7523 61 | ], 62 | "VocalSound": [ 63 | 0.6953, 64 | 0.7481, 65 | 0.6606, 66 | 0.7013333333333334 67 | ] 68 | } -------------------------------------------------------------------------------- /logs/palm/f1_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "Beijing-Opera": [ 3 | 0.9738, 4 | 0.9046, 5 | 0.9786, 6 | 0.9523333333333334 7 | ], 8 | "CREMA-D": [ 9 | 0.3277, 10 | 0.3083, 11 | 0.199, 12 | 0.2783333333333333 13 | ], 14 | "ESC50-Actions": [ 15 | 0.9698, 16 | 0.962, 17 | 0.9644, 18 | 0.9653999999999999 19 | ], 20 | "ESC50": [ 21 | 0.9544, 22 | 0.9591, 23 | 0.9614, 24 | 0.9583 25 | ], 26 | "GT-Music-Genre": [ 27 | 0.7914, 28 | 0.7888, 29 | 0.8257, 30 | 0.8019666666666666 31 | ], 32 | "NS-Instruments": [ 33 | 0.6418, 34 | 0.6062, 35 | 0.6834, 36 | 0.6438 37 | ], 38 | "RAVDESS": [ 39 | 0.4486, 40 | 0.4718, 41 | 0.4435, 42 | 0.4546333333333334 43 | ], 44 | "SESA": [ 45 | 0.8955, 46 | 0.9197, 47 | 0.8805, 48 | 0.8985666666666666 49 | ], 50 | "TUT2017": [ 51 | 0.7942, 52 | 0.8024, 53 | 0.772, 54 | 0.7895333333333333 55 | ], 56 | "UrbanSound8K": [ 57 | 0.8158, 58 | 0.8071, 59 | 0.8101, 60 | 0.8109999999999999 61 | ], 62 | "VocalSound": [ 63 | 0.8107, 64 | 0.8179, 65 | 0.7972, 66 | 0.8086000000000001 67 | ] 68 | } -------------------------------------------------------------------------------- /logs/cocoop/f1_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "Beijing-Opera": [ 3 | 0.9556, 4 | 0.9824, 5 | 0.991, 6 | 0.9763333333333334 7 | ], 8 | "CREMA-D": [ 9 | 0.2267, 10 | 0.2681, 11 | 0.2851, 12 | 0.2599666666666667 13 | ], 14 | "ESC50-Actions": [ 15 | 0.9703, 16 | 0.9621, 17 | 0.9648, 18 | 0.9657333333333332 19 | ], 20 | "ESC50": [ 21 | 0.9448, 22 | 0.9351, 23 | 0.9431, 24 | 0.9410000000000002 25 | ], 26 | "GT-Music-Genre": [ 27 | 0.7383, 28 | 0.7417, 29 | 0.7623, 30 | 0.7474333333333334 31 | ], 32 | "NS-Instruments": [ 33 | 0.6199, 34 | 0.584, 35 | 0.6417, 36 | 0.6152000000000001 37 | ], 38 | "RAVDESS": [ 39 | 0.3634, 40 | 0.4351, 41 | 0.3436, 42 | 0.38070000000000004 43 | ], 44 | "SESA": [ 45 | 0.8425, 46 | 0.8821, 47 | 0.8847, 48 | 0.8697666666666667 49 | ], 50 | "TUT2017": [ 51 | 0.7474, 52 | 0.7198, 53 | 0.7246, 54 | 0.7306 55 | ], 56 | "UrbanSound8K": [ 57 | 0.7581, 58 | 0.7741, 59 | 0.7555, 60 | 0.7625666666666667 61 | ], 62 | "VocalSound": [ 63 | 0.8054, 64 | 0.7759, 65 | 0.7451, 66 | 0.7754666666666666 67 | ] 68 | } -------------------------------------------------------------------------------- /scripts/sesa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATASET="SESA" 3 | METHOD=$1 4 | 5 | 6 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then 7 | echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']" 8 | exit 1 9 | fi 10 | 11 | echo "Running METHOD=$METHOD on DATASET=$DATASET" 12 | 13 | DATASET_ROOT="/Audio-Datasets/$DATASET" 14 | 15 | if [ -d "$DATASET_ROOT" ]; then 16 | echo "Dataset path exists: $DATASET_ROOT" 17 | else 18 | echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT" 19 | fi 20 | 21 | 22 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then 23 | CTX_DIM=512 24 | else 25 | CTX_DIM=1024 26 | fi 27 | 28 | 29 | if [ "$METHOD" = "zeroshot" ]; then 30 | SEEDS=0 31 | else 32 | SEEDS="0 1 2" 33 | fi 34 | 35 | 36 | 37 | for SEED in $SEEDS 38 | do 39 | python main.py \ 40 | --model_name $METHOD \ 41 | --dataset_root $DATASET_ROOT \ 42 | --n_epochs 50 \ 43 | --freq_test_model 10 \ 44 | --ctx_dim $CTX_DIM \ 45 | --batch_size 16 \ 46 | --lr 0.05 \ 47 | --seed $SEED \ 48 | --exp_name "$DATASET" \ 49 | --num_shots 16 \ 50 | --do_logging 51 | done -------------------------------------------------------------------------------- /scripts/crema_d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATASET="CREMA-D" 3 | METHOD=$1 4 | 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then 6 | echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']" 7 | exit 1 8 | fi 9 | 10 | echo "Running METHOD=$METHOD on DATASET=$DATASET" 11 | 12 | DATASET_ROOT="/Audio-Datasets/$DATASET" 13 | 14 | if [ -d "$DATASET_ROOT" ]; then 15 | echo "Dataset path exists: $DATASET_ROOT" 16 | else 17 | echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT" 18 | fi 19 | 20 | 21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then 22 | CTX_DIM=512 23 | else 24 | CTX_DIM=1024 25 | fi 26 | 27 | 28 | if [ "$METHOD" = "zeroshot" ]; then 29 | SEEDS=0 30 | else 31 | SEEDS="0 1 2" 32 | fi 33 | 34 | 35 | 36 | 37 | for SEED in $SEEDS 38 | do 39 | python main.py \ 40 | --model_name $METHOD \ 41 | --dataset_root $DATASET_ROOT \ 42 | --n_epochs 50 \ 43 | --freq_test_model 10 \ 44 | --ctx_dim $CTX_DIM \ 45 | --batch_size 16 \ 46 | --lr 0.05 \ 47 | --seed $SEED \ 48 | --exp_name "$DATASET" \ 49 | --num_shots 16 \ 50 | --do_logging 51 | done -------------------------------------------------------------------------------- /scripts/ravdess.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATASET="RAVDESS" 3 | METHOD=$1 4 | 5 | 6 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then 7 | echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']" 8 | exit 1 9 | fi 10 | 11 | echo "Running METHOD=$METHOD on DATASET=$DATASET" 12 | 13 | DATASET_ROOT="/Audio-Datasets/$DATASET" 14 | 15 | if [ -d "$DATASET_ROOT" ]; then 16 | echo "Dataset path exists: $DATASET_ROOT" 17 | else 18 | echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT" 19 | fi 20 | 21 | 22 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then 23 | CTX_DIM=512 24 | else 25 | CTX_DIM=1024 26 | fi 27 | 28 | 29 | if [ "$METHOD" = "zeroshot" ]; then 30 | SEEDS=0 31 | else 32 | SEEDS="0 1 2" 33 | fi 34 | 35 | 36 | 37 | for SEED in $SEEDS 38 | do 39 | python main.py \ 40 | --model_name $METHOD \ 41 | --dataset_root $DATASET_ROOT \ 42 | --n_epochs 50 \ 43 | --freq_test_model 10 \ 44 | --ctx_dim $CTX_DIM \ 45 | --batch_size 16 \ 46 | --lr 0.05 \ 47 | --seed $SEED \ 48 | --exp_name "$DATASET" \ 49 | --num_shots 16 \ 50 | --do_logging 51 | done -------------------------------------------------------------------------------- /scripts/vocal_sound.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATASET="VocalSound" 3 | METHOD=$1 4 | 5 | 6 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then 7 | echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']" 8 | exit 1 9 | fi 10 | 11 | echo "Running METHOD=$METHOD on DATASET=$DATASET" 12 | 13 | DATASET_ROOT="/Audio-Datasets/$DATASET" 14 | 15 | if [ -d "$DATASET_ROOT" ]; then 16 | echo "Dataset path exists: $DATASET_ROOT" 17 | else 18 | echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT" 19 | fi 20 | 21 | 22 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then 23 | CTX_DIM=512 24 | else 25 | CTX_DIM=1024 26 | fi 27 | 28 | 29 | if [ "$METHOD" = "zeroshot" ]; then 30 | SEEDS=0 31 | else 32 | SEEDS="0 1 2" 33 | fi 34 | 35 | 36 | 37 | for SEED in $SEEDS 38 | do 39 | python main.py \ 40 | --model_name $METHOD \ 41 | --dataset_root $DATASET_ROOT \ 42 | --n_epochs 50 \ 43 | --freq_test_model 10 \ 44 | --ctx_dim $CTX_DIM \ 45 | --batch_size 16 \ 46 | --lr 0.05 \ 47 | --seed $SEED \ 48 | --exp_name "$DATASET" \ 49 | --num_shots 16 \ 50 | --do_logging 51 | done -------------------------------------------------------------------------------- /scripts/gt_music_genre.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATASET="GT-Music-Genre" 3 | METHOD=$1 4 | 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then 6 | echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']" 7 | exit 1 8 | fi 9 | 10 | echo "Running METHOD=$METHOD on DATASET=$DATASET" 11 | 12 | DATASET_ROOT="/Audio-Datasets/$DATASET" 13 | 14 | if [ -d "$DATASET_ROOT" ]; then 15 | echo "Dataset path exists: $DATASET_ROOT" 16 | else 17 | echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT" 18 | fi 19 | 20 | 21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then 22 | CTX_DIM=512 23 | else 24 | CTX_DIM=1024 25 | fi 26 | 27 | 28 | if [ "$METHOD" = "zeroshot" ]; then 29 | SEEDS=0 30 | else 31 | SEEDS="0 1 2" 32 | fi 33 | 34 | 35 | 36 | 37 | for SEED in $SEEDS 38 | do 39 | python main.py \ 40 | --model_name $METHOD \ 41 | --dataset_root $DATASET_ROOT \ 42 | --n_epochs 50 \ 43 | --freq_test_model 10 \ 44 | --ctx_dim $CTX_DIM \ 45 | --batch_size 16 \ 46 | --lr 0.05 \ 47 | --seed $SEED \ 48 | --exp_name "$DATASET" \ 49 | --num_shots 16 \ 50 | --do_logging 51 | done -------------------------------------------------------------------------------- /scripts/ns_instruments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATASET="NS-Instruments" 3 | METHOD=$1 4 | 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then 6 | echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']" 7 | exit 1 8 | fi 9 | 10 | echo "Running METHOD=$METHOD on DATASET=$DATASET" 11 | 12 | DATASET_ROOT="/Audio-Datasets/$DATASET" 13 | 14 | if [ -d "$DATASET_ROOT" ]; then 15 | echo "Dataset path exists: $DATASET_ROOT" 16 | else 17 | echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT" 18 | fi 19 | 20 | 21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then 22 | CTX_DIM=512 23 | else 24 | CTX_DIM=1024 25 | fi 26 | 27 | 28 | if [ "$METHOD" = "zeroshot" ]; then 29 | SEEDS=0 30 | else 31 | SEEDS="0 1 2" 32 | fi 33 | 34 | 35 | 36 | 37 | for SEED in $SEEDS 38 | do 39 | python main.py \ 40 | --model_name $METHOD \ 41 | --dataset_root $DATASET_ROOT \ 42 | --n_epochs 50 \ 43 | --freq_test_model 10 \ 44 | --ctx_dim $CTX_DIM \ 45 | --batch_size 16 \ 46 | --lr 0.05 \ 47 | --seed $SEED \ 48 | --exp_name "$DATASET" \ 49 | --num_shots 16 \ 50 | --do_logging 51 | done 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /scripts/esc50.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATASET="ESC50" 3 | METHOD=$1 4 | 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then 6 | echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']" 7 | exit 1 8 | fi 9 | 10 | echo "Running METHOD=$METHOD on DATASET=$DATASET" 11 | 12 | DATASET_ROOT="/Audio-Datasets/$DATASET" 13 | 14 | if [ -d "$DATASET_ROOT" ]; then 15 | echo "Dataset path exists: $DATASET_ROOT" 16 | else 17 | echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT" 18 | fi 19 | 20 | 21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then 22 | CTX_DIM=512 23 | else 24 | CTX_DIM=1024 25 | fi 26 | 27 | 28 | if [ "$METHOD" = "zeroshot" ]; then 29 | SEEDS=0 30 | else 31 | SEEDS="0 1 2" 32 | fi 33 | 34 | 35 | 36 | for FOLD in 1 2 3 4 5 37 | do 38 | for SEED in $SEEDS 39 | do 40 | echo "Running Fold-$FOLD with SEED=$SEED" 41 | if [ -f "$DATASET_ROOT/train.csv" ]; then rm -rf "$DATASET_ROOT/train.csv"; fi 42 | if [ -f "$DATASET_ROOT/test.csv" ]; then rm -rf "$DATASET_ROOT/test.csv"; fi 43 | cp "$DATASET_ROOT/csv_files/train_$FOLD.csv" "$DATASET_ROOT/train.csv" 44 | cp "$DATASET_ROOT/csv_files/test_$FOLD.csv" "$DATASET_ROOT/test.csv" 45 | 46 | python main.py \ 47 | --model_name $METHOD \ 48 | --dataset_root $DATASET_ROOT \ 49 | --n_epochs 50 \ 50 | --freq_test_model 10 \ 51 | --ctx_dim $CTX_DIM \ 52 | --batch_size 16 \ 53 | --lr 0.05 \ 54 | --seed $SEED \ 55 | --exp_name "$DATASET-FOLD$FOLD" \ 56 | --num_shots 16 \ 57 | --do_logging 58 | done 59 | done -------------------------------------------------------------------------------- /scripts/tut.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATASET="TUT2017" 3 | METHOD=$1 4 | 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then 6 | echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']" 7 | exit 1 8 | fi 9 | 10 | echo "Running METHOD=$METHOD on DATASET=$DATASET" 11 | 12 | DATASET_ROOT="/Audio-Datasets/$DATASET" 13 | 14 | if [ -d "$DATASET_ROOT" ]; then 15 | echo "Dataset path exists: $DATASET_ROOT" 16 | else 17 | echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT" 18 | fi 19 | 20 | 21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then 22 | CTX_DIM=512 23 | else 24 | CTX_DIM=1024 25 | fi 26 | 27 | 28 | if [ "$METHOD" = "zeroshot" ]; then 29 | SEEDS=0 30 | else 31 | SEEDS="0 1 2" 32 | fi 33 | 34 | 35 | 36 | 37 | for FOLD in 1 2 3 4 38 | do 39 | for SEED in $SEEDS 40 | do 41 | echo "Running Fold-$FOLD with SEED=$SEED" 42 | if [ -f "$DATASET_ROOT/train.csv" ]; then rm -rf "$DATASET_ROOT/train.csv"; fi 43 | if [ -f "$DATASET_ROOT/test.csv" ]; then rm -rf "$DATASET_ROOT/test.csv"; fi 44 | cp "$DATASET_ROOT/csv_files/train_$FOLD.csv" "$DATASET_ROOT/train.csv" 45 | cp "$DATASET_ROOT/csv_files/test_$FOLD.csv" "$DATASET_ROOT/test.csv" 46 | 47 | python main.py \ 48 | --model_name $METHOD \ 49 | --dataset_root $DATASET_ROOT \ 50 | --n_epochs 50 \ 51 | --freq_test_model 10 \ 52 | --ctx_dim $CTX_DIM \ 53 | --batch_size 16 \ 54 | --lr 0.05 \ 55 | --seed $SEED \ 56 | --exp_name "$DATASET-FOLD$FOLD" \ 57 | --num_shots 16 \ 58 | --do_logging 59 | done 60 | done -------------------------------------------------------------------------------- /scripts/beijing_opera.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATASET="Beijing-Opera" 3 | METHOD=$1 4 | 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then 6 | echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']" 7 | exit 1 8 | fi 9 | 10 | echo "Running METHOD=$METHOD on DATASET=$DATASET" 11 | 12 | DATASET_ROOT="/Audio-Datasets/$DATASET" 13 | 14 | if [ -d "$DATASET_ROOT" ]; then 15 | echo "Dataset path exists: $DATASET_ROOT" 16 | else 17 | echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT" 18 | fi 19 | 20 | 21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then 22 | CTX_DIM=512 23 | else 24 | CTX_DIM=1024 25 | fi 26 | 27 | 28 | if [ "$METHOD" = "zeroshot" ]; then 29 | SEEDS=0 30 | else 31 | SEEDS="0 1 2" 32 | fi 33 | 34 | 35 | for FOLD in 1 2 3 4 5 36 | do 37 | for SEED in $SEEDS 38 | do 39 | echo "Running Fold-$FOLD with SEED=$SEED" 40 | if [ -f "$DATASET_ROOT/train.csv" ]; then rm -rf "$DATASET_ROOT/train.csv"; fi 41 | if [ -f "$DATASET_ROOT/test.csv" ]; then rm -rf "$DATASET_ROOT/test.csv"; fi 42 | cp "$DATASET_ROOT/csv_files/train_$FOLD.csv" "$DATASET_ROOT/train.csv" 43 | cp "$DATASET_ROOT/csv_files/test_$FOLD.csv" "$DATASET_ROOT/test.csv" 44 | 45 | python main.py \ 46 | --model_name $METHOD \ 47 | --dataset_root $DATASET_ROOT \ 48 | --n_epochs 50 \ 49 | --freq_test_model 10 \ 50 | --ctx_dim $CTX_DIM \ 51 | --batch_size 16 \ 52 | --lr 0.05 \ 53 | --seed $SEED \ 54 | --exp_name "$DATASET-FOLD$FOLD" \ 55 | --num_shots 16 \ 56 | --do_logging 57 | done 58 | done -------------------------------------------------------------------------------- /scripts/esc50_actions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATASET="ESC50-Actions" 3 | METHOD=$1 4 | 5 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then 6 | echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']" 7 | exit 1 8 | fi 9 | 10 | echo "Running METHOD=$METHOD on DATASET=$DATASET" 11 | 12 | DATASET_ROOT="/Audio-Datasets/$DATASET" 13 | 14 | if [ -d "$DATASET_ROOT" ]; then 15 | echo "Dataset path exists: $DATASET_ROOT" 16 | else 17 | echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT" 18 | fi 19 | 20 | 21 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then 22 | CTX_DIM=512 23 | else 24 | CTX_DIM=1024 25 | fi 26 | 27 | 28 | if [ "$METHOD" = "zeroshot" ]; then 29 | SEEDS=0 30 | else 31 | SEEDS="0 1 2" 32 | fi 33 | 34 | 35 | 36 | 37 | for FOLD in 1 2 3 4 5 38 | do 39 | for SEED in $SEEDS 40 | do 41 | echo "Running Fold-$FOLD with SEED=$SEED" 42 | if [ -f "$DATASET_ROOT/train.csv" ]; then rm -rf "$DATASET_ROOT/train.csv"; fi 43 | if [ -f "$DATASET_ROOT/test.csv" ]; then rm -rf "$DATASET_ROOT/test.csv"; fi 44 | cp "$DATASET_ROOT/csv_files/train_$FOLD.csv" "$DATASET_ROOT/train.csv" 45 | cp "$DATASET_ROOT/csv_files/test_$FOLD.csv" "$DATASET_ROOT/test.csv" 46 | 47 | python main.py \ 48 | --model_name $METHOD \ 49 | --dataset_root $DATASET_ROOT \ 50 | --n_epochs 50 \ 51 | --freq_test_model 10 \ 52 | --ctx_dim $CTX_DIM \ 53 | --batch_size 16 \ 54 | --lr 0.05 \ 55 | --seed $SEED \ 56 | --exp_name "$DATASET-FOLD$FOLD" \ 57 | --num_shots 16 \ 58 | --do_logging 59 | done 60 | done -------------------------------------------------------------------------------- /scripts/urban_sound.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATASET="UrbanSound8K" 3 | METHOD=$1 4 | 5 | 6 | if [ "$METHOD" != "zeroshot" ] && [ "$METHOD" != "coop" ] && [ "$METHOD" != "cocoop" ] && [ "$METHOD" != "palm" ]; then 7 | echo "Invalid METHOD=$METHOD . Please choose one of the following: ['zeroshot', 'coop', 'cocoop', 'palm']" 8 | exit 1 9 | fi 10 | 11 | echo "Running METHOD=$METHOD on DATASET=$DATASET" 12 | 13 | DATASET_ROOT="/Audio-Datasets/$DATASET" 14 | 15 | if [ -d "$DATASET_ROOT" ]; then 16 | echo "Dataset path exists: $DATASET_ROOT" 17 | else 18 | echo "Dataset path does not exist. Please set the correct path to the dataset root directory in variable DATASET_ROOT" 19 | fi 20 | 21 | 22 | if [ "$METHOD" = "coop" ] || [ "$METHOD" = "cocoop" ]; then 23 | CTX_DIM=512 24 | else 25 | CTX_DIM=1024 26 | fi 27 | 28 | 29 | if [ "$METHOD" = "zeroshot" ]; then 30 | SEEDS=0 31 | else 32 | SEEDS="0 1 2" 33 | fi 34 | 35 | 36 | 37 | for FOLD in 1 2 3 4 5 6 7 8 9 10 38 | do 39 | for SEED in $SEEDS 40 | do 41 | echo "Running Fold-$FOLD with SEED=$SEED" 42 | if [ -f "$DATASET_ROOT/train.csv" ]; then rm -rf "$DATASET_ROOT/train.csv"; fi 43 | if [ -f "$DATASET_ROOT/test.csv" ]; then rm -rf "$DATASET_ROOT/test.csv"; fi 44 | cp "$DATASET_ROOT/csv_files/train_$FOLD.csv" "$DATASET_ROOT/train.csv" 45 | cp "$DATASET_ROOT/csv_files/test_$FOLD.csv" "$DATASET_ROOT/test.csv" 46 | 47 | python main.py \ 48 | --model_name $METHOD \ 49 | --dataset_root $DATASET_ROOT \ 50 | --n_epochs 50 \ 51 | --freq_test_model 10 \ 52 | --ctx_dim $CTX_DIM \ 53 | --batch_size 16 \ 54 | --lr 0.05 \ 55 | --seed $SEED \ 56 | --exp_name "$DATASET-FOLD$FOLD" \ 57 | --num_shots 16 \ 58 | --do_logging 59 | done 60 | done -------------------------------------------------------------------------------- /logs/zeroshot/SESA-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | Time & Date = 09:10 PM , 10_Jun_2024 GST 9 | 10 | 11 | 12 | Model: ZEROSHOT 13 | Dataset: SESA 14 | Seed: 0 15 | 16 | 17 | Creating a 16-shot dataset ... 18 | 19 | 20 | ################## Dataset Information ################## 21 | FewShot Dataset 22 | 23 | Root: REDACTED/Audio-Datasets/SESA 24 | 25 | Number of Classes: 4 26 | 27 | Number of Shots: 16 28 | 29 | Total Number of Samples: 64 30 | 31 | Classnames: ['casual', 'explosion', 'gunshot', 'siren'] 32 | 33 | Label to Classname: {0: 'casual', 1: 'explosion', 2: 'gunshot', 3: 'siren'} 34 | 35 | Classname to Label: {'casual': 0, 'explosion': 1, 'gunshot': 2, 'siren': 3} 36 | ######################################################## 37 | 38 | 39 | 40 | 41 | ################## Dataset Information ################## 42 | FewShot Dataset 43 | 44 | Root: REDACTED/Audio-Datasets/SESA 45 | 46 | Number of Classes: 4 47 | 48 | Number of Shots: -1 49 | 50 | Total Number of Samples: 105 51 | 52 | Classnames: ['casual', 'explosion', 'gunshot', 'siren'] 53 | 54 | Label to Classname: {0: 'casual', 1: 'explosion', 2: 'gunshot', 3: 'siren'} 55 | 56 | Classname to Label: {'casual': 0, 'explosion': 1, 'gunshot': 2, 'siren': 3} 57 | ######################################################## 58 | 59 | 60 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 61 | warnings.warn( 62 | 63 | 64 | ################## Zero-Shot PENGI Information ################## 65 | Prompt Prefix: The is a recording of 66 | Prompts: ['The is a recording of casual.', 'The is a recording of explosion.', 'The is a recording of gunshot.', 'The is a recording of siren.'] 67 | ################################################################### 68 | 69 | 70 | args: Namespace(batch_size=16, classnames=['casual', 'explosion', 'gunshot', 'siren'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/SESA', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='SESA', freq_test_model=10, json_file_path='logs/zeroshot/SESA.json', log_dir='logs/zeroshot', lr=0.05, model_name='zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, save_model_path='REDACTED/Audio-Datasets/SESA', seed=0, spec_aug=False, start_epoch=0) 71 | 72 | 73 | Evaluating the model ... 74 | 75 | 0%| | 0/1 [00:00 0 Hours : 0 Minutes : 6 Seconds 83 | 84 | 85 | Accuracy = 0.7238 86 | F1-Score = 0.6827 87 | Precision = 0.6941 88 | Recall = 0.7508 89 | Average Loss = 1.3722 90 | 91 | 92 | Saving Results ... 93 | Results Saved 94 | 95 | 96 | -------------------------------------------------------------------------------- /palm/zeroshot.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .encoders import AudioEncoder, TextEncoder 3 | 4 | 5 | 6 | class ZeroShotPENGI(torch.nn.Module): 7 | def __init__(self, args, pengi): 8 | super().__init__() 9 | 10 | self.args = args 11 | pengi_args = pengi.args 12 | self.pengi_args = pengi_args 13 | self.process_text = pengi.preprocess_text 14 | 15 | pengi_args.specaug = args.spec_aug 16 | 17 | self.audio_encoder = AudioEncoder( 18 | pengi_args.audioenc_name, pengi_args.out_emb, pengi_args.d_proj, 19 | pengi_args.sampling_rate, pengi_args.window_size, pengi_args.hop_size, pengi_args.mel_bins, pengi_args.fmin, pengi_args.fmax, pengi_args.classes_num, 20 | pengi_args.specaug, pengi_args.mixup, pengi_args.use_pretrained_audioencoder, pengi_args.freeze_audio_encoder_weights, 21 | pengi_args.use_precomputed_melspec, pengi_args.pretrained_audioencoder_path) 22 | 23 | self.text_encoder = TextEncoder( 24 | pengi_args.d_proj, 25 | pengi_args.text_model, pengi_args.transformer_embed_dim, 26 | pengi_args.freeze_text_encoder_weights) 27 | 28 | 29 | # load the weights of the pengi pre-trained audio and text encoders 30 | print("ZERO SHOT: loading the weights of the pengi pre-trained audio and text encoders ...") 31 | self.audio_encoder.load_state_dict(pengi.model.audio_encoder.state_dict()) 32 | self.text_encoder.load_state_dict(pengi.model.caption_encoder.state_dict()) 33 | 34 | 35 | self.audio_encoder.eval() 36 | self.text_encoder.eval() 37 | 38 | self.device = args.device 39 | 40 | prompt_prefix = args.prompt_prefix 41 | self.prompts = [f"{prompt_prefix} {class_name}." for class_name in args.classnames] 42 | 43 | print("\n\n################## Zero-Shot PENGI Information ##################") 44 | print("Prompt Prefix: ", prompt_prefix) 45 | print("Prompts: ", self.prompts) 46 | print("###################################################################\n\n") 47 | 48 | def forward(self, audio): 49 | 50 | audio_features = self.audio_encoder(audio)[0] # audio_features shape [n_audio_files, 1024] 51 | audio_features = audio_features / audio_features.norm(dim=-1, keepdim=True) 52 | 53 | 54 | tokenized_prompts = self.process_text(self.prompts, enc_tok=True, add_text=False) 55 | 56 | prompts_tokens = tokenized_prompts['input_ids'].to(self.device) 57 | # breakpoint() 58 | prompts_token_embeddings = self.text_encoder.base.embeddings.token_embedding(prompts_tokens).to(self.device) # [batch_size, seq_length, embed_dim] 59 | prompts_attention_mask = tokenized_prompts['attention_mask'].to(self.device) 60 | 61 | text = {"input_ids": prompts_tokens, "inputs_embeds": prompts_token_embeddings, "attention_mask": prompts_attention_mask} 62 | text_features = self.text_encoder(text) # text_features shape [n_text_prompts, 1024] 63 | text_features = text_features / text_features.norm(dim=-1, keepdim=True) 64 | 65 | logit_scale = 100.0 66 | logits = logit_scale * audio_features @ text_features.t() # logits shape [n_audio_files, n_text_prompts] 67 | # breakpoint() 68 | 69 | return logits 70 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | import datetime 5 | import pytz 6 | from tqdm import tqdm 7 | from pprint import pprint 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | import palm 13 | from pengi import pengi 14 | 15 | 16 | from utils import trainer 17 | from utils.utils import print_total_time, get_args, get_dataloaders, get_model, setup_logging, get_scores, print_scores, save_scores, load_model 18 | 19 | # to solve the issue of : the current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks 20 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 21 | 22 | 23 | def main(args): 24 | 25 | print(f"\n\n{'Model:':<10}{args.model_name.upper()}") 26 | print(f"{'Dataset:':<10}{args.dataset_root.split('/')[-1]}") 27 | print(f"{'Seed:':<10}{args.seed}\n\n") 28 | 29 | 30 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 31 | args.device = device 32 | 33 | args.process_audio_fn = pengi.preprocess_audio 34 | 35 | # to ensure reproducibility 36 | seed = args.seed 37 | torch.manual_seed(seed) 38 | torch.cuda.manual_seed(seed) 39 | torch.cuda.manual_seed_all(seed) 40 | np.random.seed(seed) 41 | random.seed(seed) 42 | 43 | 44 | train_dataloader, test_dataloader = get_dataloaders(args) 45 | args.classnames = train_dataloader.dataset.classnames 46 | assert train_dataloader.dataset.classnames == test_dataloader.dataset.classnames, "Classnames in train and test datasets are different." 47 | 48 | model = get_model(args, pengi, palm) 49 | model.to(device) 50 | 51 | criterion = nn.CrossEntropyLoss() 52 | 53 | print("\nArguments:\n") 54 | for arg in vars(args): print(f"{arg:<25}: {getattr(args, arg)}") 55 | print("\n\n") 56 | 57 | 58 | if args.eval_only: 59 | if args.model_name != "zeroshot": load_model(args, model) 60 | test_loss, actual_labels, predicted_labels = trainer.run_evaluation(model, test_dataloader, criterion, device) 61 | accuracy, f1_score, precision, recall = get_scores(actual_labels, predicted_labels, args.classnames) 62 | print(f"\n\n-------------------------------\nTest Evaluation\n-------------------------------\n") 63 | print_scores(accuracy, f1_score, precision, recall, test_loss) 64 | if args.do_logging: 65 | print("Saving Results ...") 66 | save_scores(args.seed, -1, accuracy, f1_score, precision, recall, test_loss, args.json_file_path) 67 | print("Results Saved\n\n") 68 | else: 69 | #optimizer = torch.optim.Adam(model.prompt_learner.parameters(), lr=args.lr) 70 | optimizer = torch.optim.SGD(model.prompt_learner.parameters(), lr=args.lr, momentum=0.9) 71 | trainer.run_training(model, train_dataloader, test_dataloader, optimizer, criterion, device, epochs=args.n_epochs, args=args) 72 | 73 | 74 | 75 | if __name__ == "__main__": 76 | 77 | args = get_args() 78 | log_file = setup_logging(args) 79 | 80 | print("\n\n##############################################") 81 | print("PALM: Prompt Learning in Audio Language Models") 82 | print("##############################################\n\n") 83 | date_now = datetime.datetime.now(pytz.timezone('Asia/Dubai')) 84 | print(f'Time & Date = {date_now.strftime("%I:%M %p")} , {date_now.strftime("%d_%b_%Y")} GST\n') 85 | 86 | main(args) 87 | 88 | -------------------------------------------------------------------------------- /logs/process_results.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | 6 | 7 | # Function to load JSON data from a file 8 | def load_json(filepath): 9 | with open(filepath, 'r') as f: 10 | return json.load(f) 11 | 12 | 13 | def check_seed_existence(results): 14 | seeds_exist = [] 15 | for seed in SEEDS: 16 | if f'seed_{seed}' in results.keys(): seeds_exist.append(seed) 17 | return seeds_exist 18 | 19 | 20 | # Function to get results for all seeds of a dataset and method 21 | def get_dataset_results(dataset): 22 | json_path = f"{os.path.join(results_folder, dataset)}.json" 23 | 24 | if os.path.exists(json_path): 25 | results = load_json(json_path) 26 | seeds_exist = check_seed_existence(results) 27 | if len(seeds_exist) != len(SEEDS): raise ValueError(f"Seeds {set(SEEDS)-set(seeds_exist)} not found in {json_path} file. Get results for all seeds first in '{json_path}'.") 28 | else: 29 | raise ValueError(f"File {json_path} does not exist. Get results for Dataset='{dataset}'.") 30 | 31 | return results 32 | 33 | 34 | 35 | def get_results(): 36 | results = {} 37 | for dataset in DATASETS: 38 | results[dataset] = get_dataset_results(dataset) 39 | return results 40 | 41 | if __name__ == "__main__": 42 | 43 | # Datasets 44 | DATASETS = [ 45 | 'Beijing-Opera', 46 | 'CREMA-D', 47 | 'ESC50-Actions', 48 | 'ESC50', 49 | 'GT-Music-Genre', 50 | 'NS-Instruments', 51 | 'RAVDESS', 52 | 'SESA', 53 | 'TUT2017', 54 | 'UrbanSound8K', 55 | 'VocalSound', 56 | ] 57 | 58 | 59 | 60 | methods = ['zeroshot', 'coop', 'cocoop', 'palm'] 61 | 62 | for method in methods: 63 | # Folder containing the JSON files 64 | results_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), method) 65 | 66 | if method == 'zeroshot': 67 | SEEDS = [0] 68 | else: 69 | SEEDS = [0,1,2] 70 | 71 | results = get_results() 72 | 73 | 74 | accuracy_dict = {} 75 | f1_score_dict = {} 76 | 77 | for dataset in DATASETS: 78 | 79 | accuracy_sub_list = [] 80 | f1_score_sub_list = [] 81 | 82 | for seed in SEEDS: 83 | accuracy_sub_list.append(results[dataset][f'seed_{seed}']['accuracy']) 84 | f1_score_sub_list.append(results[dataset][f'seed_{seed}']['f1_score']) 85 | 86 | if len(accuracy_sub_list) > 1: 87 | accuracy_sub_list.append(np.mean(accuracy_sub_list)) 88 | f1_score_sub_list.append(np.mean(f1_score_sub_list)) 89 | 90 | accuracy_dict[dataset] = accuracy_sub_list 91 | f1_score_dict[dataset] = f1_score_sub_list 92 | 93 | 94 | with open(os.path.join(results_folder,'accuracy.json'), 'w') as f: 95 | json.dump(accuracy_dict, f, indent=2) 96 | print(f"Accuracy results saved in {os.path.join(results_folder,'accuracy.json')} file.") 97 | 98 | 99 | with open(os.path.join(results_folder,'f1_score.json'), 'w') as f: 100 | json.dump(f1_score_dict, f, indent=2) 101 | print(f"F1-score results saved in {os.path.join(results_folder,'f1_score.json')}.") 102 | 103 | 104 | print("\n\nResults saved successfully.\n\n") -------------------------------------------------------------------------------- /utils/trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | from tqdm import tqdm 5 | 6 | from .utils import get_scores, print_scores, save_scores, timeit, save_model, get_save_model_path 7 | 8 | 9 | def run_epoch(model, dataloader, optimizer, criterion, device, args=None): 10 | model.train() 11 | 12 | losses = [] 13 | actual_labels = [] 14 | predicted_labels = [] 15 | 16 | for i, (audio, label) in enumerate(dataloader): 17 | 18 | audio = audio.to(device).squeeze(1) 19 | label = label.to(device) 20 | 21 | 22 | logits = model(audio) 23 | loss = criterion(logits, label) 24 | 25 | optimizer.zero_grad() 26 | loss.backward() 27 | optimizer.step() 28 | 29 | losses.append(loss.item()) 30 | 31 | actual_labels.extend(label.cpu().numpy()) 32 | predicted_labels.extend(logits.argmax(axis=1).cpu().numpy()) 33 | 34 | avg_loss = sum(losses) / len(losses) 35 | 36 | return avg_loss, actual_labels, predicted_labels 37 | 38 | 39 | @timeit 40 | def run_evaluation(model, dataloader, criterion, device): 41 | model.eval() 42 | 43 | losses = [] 44 | actual_labels = [] 45 | predicted_labels = [] 46 | 47 | print("\n\nEvaluating the model ...") 48 | with torch.no_grad(): 49 | for i, (audio, label) in enumerate(dataloader): 50 | # for i, (audio, label) in tqdm(enumerate(dataloader), total=len(dataloader)): 51 | print(f"Batch {i+1}/{len(dataloader)}") 52 | 53 | audio = audio.to(device).squeeze(1) 54 | label = label.to(device) 55 | 56 | logits = model(audio) 57 | loss = criterion(logits, label) 58 | 59 | losses.append(loss.item()) 60 | 61 | actual_labels.extend(label.cpu().numpy()) 62 | predicted_labels.extend(logits.argmax(axis=1).cpu().numpy()) 63 | 64 | avg_loss = sum(losses) / len(losses) 65 | 66 | return avg_loss, actual_labels, predicted_labels 67 | 68 | 69 | @timeit 70 | def run_training(model, train_dataloader, test_dataloader, optimizer, criterion, device, epochs=50, args=None): 71 | 72 | for epoch in tqdm(range(epochs), total=epochs): 73 | 74 | train_loss, actual_labels, predicted_labels = run_epoch(model, train_dataloader, optimizer, criterion, device, args=args) 75 | 76 | if (epoch+1)%5 == 0: 77 | accuracy, f1_score, precision, recall = get_scores(actual_labels, predicted_labels, args.classnames) 78 | print(f"\n\n-------------------------------\nTrain Evaluation (Epoch {epoch + 1}/{epochs})\n-------------------------------\n") 79 | print_scores(accuracy, f1_score, precision, recall, train_loss) 80 | 81 | 82 | if (epoch+1)%args.freq_test_model == 0: 83 | test_loss, actual_labels, predicted_labels = run_evaluation(model, test_dataloader, criterion, device) 84 | accuracy, f1_score, precision, recall = get_scores(actual_labels, predicted_labels, args.classnames) 85 | print(f"\n\n-------------------------------\nTest Evaluation\n-------------------------------\n") 86 | print_scores(accuracy, f1_score, precision, recall, test_loss) 87 | 88 | if (epoch == epochs-1) and args.do_logging: 89 | print("\n\nFinal Evaluation") 90 | print("Saving Results ...") 91 | save_scores(args.seed, epoch, accuracy, f1_score, precision, recall, test_loss, args.json_file_path) 92 | print("Results Saved\n\n") 93 | 94 | 95 | if args.save_model: 96 | save_model_path = get_save_model_path(args) 97 | save_model(args, model, save_model_path) 98 | print(f"Model saved to {save_model_path}") 99 | -------------------------------------------------------------------------------- /utils/dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | import pandas as pd 4 | import os 5 | 6 | 7 | 8 | class FewShotDataset(Dataset): 9 | def __init__(self, root, split=None, num_shots=-1, repeat=False, process_audio_fn=None, resample=True): 10 | """ 11 | Args: 12 | root (str): path to the dataset. 13 | num_shots (int): number of shots per class. 14 | repeat (bool): repeat samples if needed (default: False). 15 | process_audio_fn (function): function to process audio samples. 16 | resample (bool): resample audio samples (default: True). 17 | 18 | """ 19 | 20 | assert split is not None, "'split' cannot be None. Choose from ['train', 'test']" 21 | 22 | self.root = root 23 | self.split = split 24 | self.num_shots = num_shots 25 | self.repeat = repeat 26 | self.resample = resample 27 | 28 | df = pd.read_csv(os.path.join(root, f"{split}.csv")) 29 | 30 | self.classnames = df['classname'].unique().tolist() 31 | self.classnames.sort() 32 | self.label2classname = {i: classname for i, classname in enumerate(self.classnames)} 33 | self.classname2label = {classname: i for i, classname in enumerate(self.classnames)} 34 | 35 | self.data = self.generate_fewshot_dataset(df, num_shots=num_shots, repeat=repeat) 36 | 37 | self.process_audio_fn = process_audio_fn 38 | 39 | print("\n\n################## Dataset Information ##################") 40 | if num_shots>0: print("FewShot Dataset") 41 | print(f"{'Root':<25} : {root}") 42 | print(f"{'Split':<25} : {split}") 43 | print(f"{'Number of Classes':<25} : {len(self.classnames)}") 44 | print(f"{'Number of Shots':<25} : {num_shots}") 45 | print(f"{'Total Number of Samples':<25} : {len(self.data)}") 46 | print(f"{'Classnames':<25} : {self.classnames}") 47 | print(f"{'Label to Classname':<25} : {self.label2classname}") 48 | print(f"{'Classname to Label':<25} : {self.classname2label}") 49 | print("########################################################\n\n") 50 | 51 | def generate_fewshot_dataset(self, df, num_shots=-1, repeat=False): 52 | """ 53 | Generate a few-shot dataset. 54 | Args: 55 | df (pd.DataFrame): dataframe containing the dataset. 56 | num_shots (int): number of shots per class. 57 | repeat (bool): repeat samples if needed. 58 | """ 59 | 60 | if num_shots == -1: 61 | return df 62 | 63 | print(f"Creating a {num_shots}-shot dataset ...") 64 | df_subset = pd.DataFrame(columns=df.columns) 65 | 66 | for classname in self.classnames: 67 | 68 | df_class = df[df['classname'] == classname] 69 | 70 | if len(df_class) >= num_shots: 71 | df_subset = pd.concat([df_subset, df_class.sample(num_shots)]) 72 | else: 73 | if repeat: 74 | df_subset = pd.concat([df_subset, df_class.sample(num_shots, replace=True)]) 75 | else: 76 | df_subset = pd.concat([df_subset,df_class]) 77 | 78 | 79 | df_subset = df_subset.reset_index(drop=True) 80 | 81 | return df_subset 82 | 83 | 84 | def __len__(self): 85 | return len(self.data) 86 | 87 | 88 | def __getitem__(self, idx): 89 | row = self.data.iloc[idx] 90 | audio_path = os.path.join(self.root, row['path']) 91 | audio = self.process_audio_fn([audio_path], self.resample) # [1,n_samples] 92 | label = self.classname2label[row['classname']] 93 | # return audio, label, audio_path, row['classname'] 94 | return audio, label 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /logs/print_results.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | from tabulate import tabulate 5 | from collections import defaultdict 6 | 7 | 8 | # Datasets and number of folds 9 | DATASETS = [ 10 | 'Beijing-Opera', 11 | 'CREMA-D', 12 | 'ESC50-Actions', 13 | 'ESC50', 14 | 'GT-Music-Genre', 15 | 'NS-Instruments', 16 | 'RAVDESS', 17 | 'SESA', 18 | 'TUT2017', 19 | 'UrbanSound8K', 20 | 'VocalSound', 21 | ] 22 | 23 | 24 | # methods = ['coop'] 25 | methods = ['zeroshot', 'coop', 'cocoop', 'palm'] 26 | 27 | accuracy_dict_all = defaultdict(list) 28 | f1_score_dict_all = defaultdict(list) 29 | 30 | accuracy_all = [] 31 | f1_score_all = [] 32 | 33 | for method in methods: 34 | # Folder containing the JSON files 35 | results_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), method) 36 | 37 | 38 | accuracy_dict = json.load(open(os.path.join(results_folder, 'accuracy.json'))) 39 | f1_score_dict = json.load(open(os.path.join(results_folder, 'f1_score.json'))) 40 | 41 | 42 | for dataset in DATASETS: 43 | accuracy_dict_all[dataset].extend(accuracy_dict[dataset]) 44 | f1_score_dict_all[dataset].extend(f1_score_dict[dataset]) 45 | 46 | 47 | 48 | 49 | # average accuracy and F1-score across all datasets 50 | for dataset in DATASETS: 51 | accuracy_all.append([accuracy for accuracy in accuracy_dict_all[dataset]]) 52 | f1_score_all.append([f1_score for f1_score in f1_score_dict_all[dataset]]) 53 | 54 | accuracy_all = np.array(accuracy_all) 55 | f1_score_all = np.array(f1_score_all) 56 | 57 | avg_accuracy_all = accuracy_all.mean(axis=0) 58 | avg_f1_score_all = f1_score_all.mean(axis=0) 59 | 60 | 61 | 62 | # print latex table 63 | string_acc = '' 64 | string_f1 = '' 65 | for dataset in DATASETS: 66 | string_acc = string_acc + f'{dataset} & ' + ' & '.join([f'{accuracy:0.4f}' for accuracy in accuracy_dict_all[dataset]]) + ' \\\\\n' 67 | string_f1 = string_f1 + f'{dataset} & ' + ' & '.join([f'{f1_score:0.4f}' for f1_score in f1_score_dict_all[dataset]]) + ' \\\\\n' 68 | 69 | 70 | string_acc = string_acc + f'\midrule\nAVERAGE & ' + ' & '.join([f'{accuracy:0.4f}' for accuracy in avg_accuracy_all]) + ' \\\\\n' 71 | string_f1 = string_f1 + f'\midrule\nAVERAGE & ' + ' & '.join([f'{f1_score:0.4f}' for f1_score in avg_f1_score_all]) + ' \\\\\n' 72 | 73 | 74 | top_row = f"DATASETS ↓ & ZERO SHOT & " 75 | for method in methods[1:]: 76 | for seed in range(3): top_row = top_row + f"{method.upper()}-SEED{seed} & " 77 | top_row = top_row + f"{method.upper()}-AVG & " 78 | top_row = top_row[:-2] + ' \\\\' 79 | 80 | print("\n\n########## ACCURACY (LaTeX Table) ##########") 81 | results_acc = top_row+"\n"+string_acc 82 | print(results_acc) 83 | 84 | print('\n\n') 85 | print("\n\n########## F1-SCORE (LaTeX Table) ##########") 86 | results_f1 = top_row+"\n"+string_f1 87 | print(results_f1) 88 | 89 | 90 | 91 | table_acc = [] 92 | for i, row in enumerate(results_acc.split("\n")): 93 | row_list = row.split("&") 94 | col_list = [] 95 | for j, col in enumerate(row_list): 96 | if col.endswith("\\\\"): col = col[:-3] 97 | col = col.strip() 98 | col_list.append(col) 99 | if '\\midrule' in col_list or '' in col_list: continue 100 | table_acc.append(col_list) 101 | print("\n\nAccuracy") 102 | print(tabulate(table_acc, tablefmt="simple")) 103 | 104 | print("\n\n") 105 | 106 | table_f1 = [] 107 | for i, row in enumerate(results_f1.split("\n")): 108 | row_list = row.split("&") 109 | col_list = [] 110 | for j, col in enumerate(row_list): 111 | if col.endswith("\\\\"): col = col[:-3] 112 | col = col.strip() 113 | col_list.append(col) 114 | if '\\midrule' in col_list or '' in col_list: continue 115 | table_f1.append(col_list) 116 | print("\n\nF1-Score") 117 | print(tabulate(table_f1, tablefmt="simple")) 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /palm/palm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from palm.encoders import AudioEncoder 5 | from palm.encoders import TextEncoder 6 | 7 | 8 | class PromptLearner(nn.Module): 9 | def __init__(self, args): 10 | super().__init__() 11 | 12 | self.args = args 13 | classnames = args.classnames 14 | n_cls = len(classnames) 15 | 16 | ctx_dim = args.ctx_dim 17 | 18 | print("Initializing a generic context") 19 | ctx = torch.empty(n_cls, ctx_dim) 20 | torch.nn.init.normal_(ctx, std=0.02) 21 | self.ctx = torch.nn.Parameter(ctx) 22 | 23 | 24 | self.n_cls = n_cls 25 | self.lambdas = nn.Parameter(torch.rand(n_cls)) 26 | 27 | 28 | def forward(self, audio_features, text_features): 29 | 30 | lambdas = torch.sigmoid(self.lambdas).reshape(-1,1) # [n_cls, 1] 31 | 32 | updated_text_features = (1-lambdas)*text_features + (lambdas*self.ctx) # [n_text_prompts, 1024] 33 | updated_text_features = updated_text_features / updated_text_features.norm(dim=-1, keepdim=True) 34 | 35 | return updated_text_features 36 | 37 | class CustomPENGI(nn.Module): 38 | def __init__(self,args,pengi): 39 | super().__init__() 40 | 41 | self.args = args 42 | pengi_args = pengi.args 43 | self.pengi_args = pengi_args 44 | 45 | self.audio_encoder = AudioEncoder( 46 | pengi_args.audioenc_name, pengi_args.out_emb, pengi_args.d_proj, 47 | pengi_args.sampling_rate, pengi_args.window_size, pengi_args.hop_size, pengi_args.mel_bins, pengi_args.fmin, pengi_args.fmax, pengi_args.classes_num, 48 | pengi_args.specaug, pengi_args.mixup, pengi_args.use_pretrained_audioencoder, pengi_args.freeze_audio_encoder_weights, 49 | pengi_args.use_precomputed_melspec, pengi_args.pretrained_audioencoder_path) 50 | 51 | self.text_encoder = TextEncoder( 52 | pengi_args.d_proj, 53 | pengi_args.text_model, pengi_args.transformer_embed_dim, 54 | pengi_args.freeze_text_encoder_weights) 55 | 56 | 57 | # load the weights of the pengi pre-trained audio and text encoders 58 | print("\n\nPALM: loading the weights of the pengi pre-trained audio and text encoders ...\n\n") 59 | self.audio_encoder.load_state_dict(pengi.model.audio_encoder.state_dict()) 60 | self.text_encoder.load_state_dict(pengi.model.caption_encoder.state_dict()) 61 | 62 | self.audio_encoder.eval() 63 | self.text_encoder.eval() 64 | 65 | self.prompt_learner = PromptLearner(args) 66 | 67 | self.process_text = pengi.preprocess_text 68 | self.device = args.device 69 | 70 | 71 | def forward(self, audio): 72 | 73 | audio_features = self.audio_encoder(audio)[0] # audio_features shape [n_audio_files, 1024] 74 | audio_features = audio_features / audio_features.norm(dim=-1, keepdim=True) 75 | 76 | 77 | prompts = [f"{class_name}" for class_name in self.args.classnames] 78 | tokenized_prompts = self.process_text(prompts, enc_tok=True, add_text=False) 79 | prompts_tokens = tokenized_prompts['input_ids'].to(self.device) 80 | prompts_attention_mask = tokenized_prompts['attention_mask'].to(self.device) 81 | 82 | with torch.no_grad(): 83 | prompts_token_embeddings = self.text_encoder.base.embeddings.token_embedding(prompts_tokens) # [batch_size, seq_length, embed_dim] 84 | 85 | text = {"input_ids": prompts_tokens, "inputs_embeds": prompts_token_embeddings, "attention_mask": prompts_attention_mask} 86 | text_features = self.text_encoder(text) # text_features shape [n_text_prompts, 1024] 87 | text_features = text_features / text_features.norm(dim=-1, keepdim=True) 88 | 89 | 90 | text_features = self.prompt_learner(audio_features, text_features) # text_features shape [n_text_prompts, 1024] 91 | 92 | 93 | logit_scale = 100.0 94 | logits = logit_scale * audio_features @ text_features.t() # logits shape [n_audio_files, n_text_prompts] 95 | # breakpoint() 96 | 97 | return logits 98 | 99 | 100 | -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-Actions-FOLD1-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: ESC50-Actions 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/ESC50-Actions 22 | 23 | Number of Classes: 10 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 160 28 | 29 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'] 30 | 31 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'} 32 | 33 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/ESC50-Actions 43 | 44 | Number of Classes: 10 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 80 49 | 50 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'] 51 | 52 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'} 53 | 54 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of breathing.', 'The is a recording of brushing teeth.', 'The is a recording of clapping.', 'The is a recording of coughing.', 'The is a recording of crying baby.', 'The is a recording of drinking sipping.', 'The is a recording of footsteps.', 'The is a recording of laughing.', 'The is a recording of sneezing.', 'The is a recording of snoring.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/ESC50-Actions', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='ESC50-Actions-FOLD1', freq_test_model=10, json_file_path='logs/pengi_zeroshot/ESC50-Actions-FOLD1.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/ESC50-Actions', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/1 [00:00 0 Hours : 0 Minutes : 1 Seconds 81 | 82 | 83 | Accuracy = 0.7000 84 | F1-Score = 0.6518 85 | Precision = 0.7361 86 | Recall = 0.7000 87 | Average Loss = 1.0603 88 | 89 | 90 | Saving Results ... 91 | Results Saved 92 | 93 | 94 | -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-Actions-FOLD2-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: ESC50-Actions 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/ESC50-Actions 22 | 23 | Number of Classes: 10 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 160 28 | 29 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'] 30 | 31 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'} 32 | 33 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/ESC50-Actions 43 | 44 | Number of Classes: 10 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 80 49 | 50 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'] 51 | 52 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'} 53 | 54 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of breathing.', 'The is a recording of brushing teeth.', 'The is a recording of clapping.', 'The is a recording of coughing.', 'The is a recording of crying baby.', 'The is a recording of drinking sipping.', 'The is a recording of footsteps.', 'The is a recording of laughing.', 'The is a recording of sneezing.', 'The is a recording of snoring.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/ESC50-Actions', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='ESC50-Actions-FOLD2', freq_test_model=10, json_file_path='logs/pengi_zeroshot/ESC50-Actions-FOLD2.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/ESC50-Actions', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/1 [00:00 0 Hours : 0 Minutes : 1 Seconds 81 | 82 | 83 | Accuracy = 0.6375 84 | F1-Score = 0.5951 85 | Precision = 0.7354 86 | Recall = 0.6375 87 | Average Loss = 1.1233 88 | 89 | 90 | Saving Results ... 91 | Results Saved 92 | 93 | 94 | -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-Actions-FOLD3-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: ESC50-Actions 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/ESC50-Actions 22 | 23 | Number of Classes: 10 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 160 28 | 29 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'] 30 | 31 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'} 32 | 33 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/ESC50-Actions 43 | 44 | Number of Classes: 10 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 80 49 | 50 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'] 51 | 52 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'} 53 | 54 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of breathing.', 'The is a recording of brushing teeth.', 'The is a recording of clapping.', 'The is a recording of coughing.', 'The is a recording of crying baby.', 'The is a recording of drinking sipping.', 'The is a recording of footsteps.', 'The is a recording of laughing.', 'The is a recording of sneezing.', 'The is a recording of snoring.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/ESC50-Actions', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='ESC50-Actions-FOLD3', freq_test_model=10, json_file_path='logs/pengi_zeroshot/ESC50-Actions-FOLD3.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/ESC50-Actions', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/1 [00:00 0 Hours : 0 Minutes : 1 Seconds 81 | 82 | 83 | Accuracy = 0.6750 84 | F1-Score = 0.6251 85 | Precision = 0.6599 86 | Recall = 0.6750 87 | Average Loss = 0.8852 88 | 89 | 90 | Saving Results ... 91 | Results Saved 92 | 93 | 94 | -------------------------------------------------------------------------------- /logs/zeroshot/ESC50-Actions-FOLD5-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: ESC50-Actions 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/ESC50-Actions 22 | 23 | Number of Classes: 10 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 160 28 | 29 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'] 30 | 31 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'} 32 | 33 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/ESC50-Actions 43 | 44 | Number of Classes: 10 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 80 49 | 50 | Classnames: ['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'] 51 | 52 | Label to Classname: {0: 'breathing', 1: 'brushing teeth', 2: 'clapping', 3: 'coughing', 4: 'crying baby', 5: 'drinking sipping', 6: 'footsteps', 7: 'laughing', 8: 'sneezing', 9: 'snoring'} 53 | 54 | Classname to Label: {'breathing': 0, 'brushing teeth': 1, 'clapping': 2, 'coughing': 3, 'crying baby': 4, 'drinking sipping': 5, 'footsteps': 6, 'laughing': 7, 'sneezing': 8, 'snoring': 9} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of breathing.', 'The is a recording of brushing teeth.', 'The is a recording of clapping.', 'The is a recording of coughing.', 'The is a recording of crying baby.', 'The is a recording of drinking sipping.', 'The is a recording of footsteps.', 'The is a recording of laughing.', 'The is a recording of sneezing.', 'The is a recording of snoring.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['breathing', 'brushing teeth', 'clapping', 'coughing', 'crying baby', 'drinking sipping', 'footsteps', 'laughing', 'sneezing', 'snoring'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/ESC50-Actions', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='ESC50-Actions-FOLD5', freq_test_model=10, json_file_path='logs/pengi_zeroshot/ESC50-Actions-FOLD5.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/ESC50-Actions', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/1 [00:00 0 Hours : 0 Minutes : 1 Seconds 81 | 82 | 83 | Accuracy = 0.6375 84 | F1-Score = 0.6191 85 | Precision = 0.6751 86 | Recall = 0.6375 87 | Average Loss = 1.0817 88 | 89 | 90 | Saving Results ... 91 | Results Saved 92 | 93 | 94 | -------------------------------------------------------------------------------- /logs/zeroshot/Beijing-Opera-FOLD1-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: Beijing-Opera 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/Beijing-Opera 22 | 23 | Number of Classes: 4 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 64 28 | 29 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo'] 30 | 31 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'} 32 | 33 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/Beijing-Opera 43 | 44 | Number of Classes: 4 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 48 49 | 50 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo'] 51 | 52 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'} 53 | 54 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of bangu.', 'The is a recording of daluo.', 'The is a recording of naobo.', 'The is a recording of xiaoluo.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['bangu', 'daluo', 'naobo', 'xiaoluo'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/Beijing-Opera', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='Beijing-Opera-FOLD1', freq_test_model=10, json_file_path='logs/pengi_zeroshot/Beijing-Opera-FOLD1.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/Beijing-Opera', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/1 [00:00 0 Hours : 0 Minutes : 1 Seconds 81 | 82 | 83 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 84 | _warn_prf(average, modifier, msg_start, len(result)) 85 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 86 | _warn_prf(average, modifier, msg_start, len(result)) 87 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 88 | _warn_prf(average, modifier, msg_start, len(result)) 89 | Accuracy = 0.2917 90 | F1-Score = 0.1468 91 | Precision = 0.3191 92 | Recall = 0.2708 93 | Average Loss = 4.0296 94 | 95 | 96 | Saving Results ... 97 | Results Saved 98 | 99 | 100 | -------------------------------------------------------------------------------- /logs/zeroshot/Beijing-Opera-FOLD2-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: Beijing-Opera 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/Beijing-Opera 22 | 23 | Number of Classes: 4 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 64 28 | 29 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo'] 30 | 31 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'} 32 | 33 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/Beijing-Opera 43 | 44 | Number of Classes: 4 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 47 49 | 50 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo'] 51 | 52 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'} 53 | 54 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of bangu.', 'The is a recording of daluo.', 'The is a recording of naobo.', 'The is a recording of xiaoluo.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['bangu', 'daluo', 'naobo', 'xiaoluo'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/Beijing-Opera', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='Beijing-Opera-FOLD2', freq_test_model=10, json_file_path='logs/pengi_zeroshot/Beijing-Opera-FOLD2.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/Beijing-Opera', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/1 [00:00 0 Hours : 0 Minutes : 1 Seconds 81 | 82 | 83 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 84 | _warn_prf(average, modifier, msg_start, len(result)) 85 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 86 | _warn_prf(average, modifier, msg_start, len(result)) 87 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 88 | _warn_prf(average, modifier, msg_start, len(result)) 89 | Accuracy = 0.2766 90 | F1-Score = 0.1083 91 | Precision = 0.0691 92 | Recall = 0.2500 93 | Average Loss = 3.7753 94 | 95 | 96 | Saving Results ... 97 | Results Saved 98 | 99 | 100 | -------------------------------------------------------------------------------- /logs/zeroshot/Beijing-Opera-FOLD3-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: Beijing-Opera 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/Beijing-Opera 22 | 23 | Number of Classes: 4 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 64 28 | 29 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo'] 30 | 31 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'} 32 | 33 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/Beijing-Opera 43 | 44 | Number of Classes: 4 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 47 49 | 50 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo'] 51 | 52 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'} 53 | 54 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of bangu.', 'The is a recording of daluo.', 'The is a recording of naobo.', 'The is a recording of xiaoluo.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['bangu', 'daluo', 'naobo', 'xiaoluo'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/Beijing-Opera', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='Beijing-Opera-FOLD3', freq_test_model=10, json_file_path='logs/pengi_zeroshot/Beijing-Opera-FOLD3.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/Beijing-Opera', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/1 [00:00 0 Hours : 0 Minutes : 1 Seconds 81 | 82 | 83 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 84 | _warn_prf(average, modifier, msg_start, len(result)) 85 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 86 | _warn_prf(average, modifier, msg_start, len(result)) 87 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 88 | _warn_prf(average, modifier, msg_start, len(result)) 89 | Accuracy = 0.2766 90 | F1-Score = 0.1083 91 | Precision = 0.0691 92 | Recall = 0.2500 93 | Average Loss = 3.8906 94 | 95 | 96 | Saving Results ... 97 | Results Saved 98 | 99 | 100 | -------------------------------------------------------------------------------- /logs/zeroshot/Beijing-Opera-FOLD4-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: Beijing-Opera 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/Beijing-Opera 22 | 23 | Number of Classes: 4 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 64 28 | 29 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo'] 30 | 31 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'} 32 | 33 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/Beijing-Opera 43 | 44 | Number of Classes: 4 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 47 49 | 50 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo'] 51 | 52 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'} 53 | 54 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of bangu.', 'The is a recording of daluo.', 'The is a recording of naobo.', 'The is a recording of xiaoluo.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['bangu', 'daluo', 'naobo', 'xiaoluo'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/Beijing-Opera', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='Beijing-Opera-FOLD4', freq_test_model=10, json_file_path='logs/pengi_zeroshot/Beijing-Opera-FOLD4.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/Beijing-Opera', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/1 [00:00 0 Hours : 0 Minutes : 1 Seconds 81 | 82 | 83 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 84 | _warn_prf(average, modifier, msg_start, len(result)) 85 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 86 | _warn_prf(average, modifier, msg_start, len(result)) 87 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 88 | _warn_prf(average, modifier, msg_start, len(result)) 89 | Accuracy = 0.2766 90 | F1-Score = 0.1083 91 | Precision = 0.0691 92 | Recall = 0.2500 93 | Average Loss = 3.9215 94 | 95 | 96 | Saving Results ... 97 | Results Saved 98 | 99 | 100 | -------------------------------------------------------------------------------- /logs/zeroshot/Beijing-Opera-FOLD5-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: Beijing-Opera 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/Beijing-Opera 22 | 23 | Number of Classes: 4 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 64 28 | 29 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo'] 30 | 31 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'} 32 | 33 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/Beijing-Opera 43 | 44 | Number of Classes: 4 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 47 49 | 50 | Classnames: ['bangu', 'daluo', 'naobo', 'xiaoluo'] 51 | 52 | Label to Classname: {0: 'bangu', 1: 'daluo', 2: 'naobo', 3: 'xiaoluo'} 53 | 54 | Classname to Label: {'bangu': 0, 'daluo': 1, 'naobo': 2, 'xiaoluo': 3} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of bangu.', 'The is a recording of daluo.', 'The is a recording of naobo.', 'The is a recording of xiaoluo.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['bangu', 'daluo', 'naobo', 'xiaoluo'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/Beijing-Opera', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='Beijing-Opera-FOLD5', freq_test_model=10, json_file_path='logs/pengi_zeroshot/Beijing-Opera-FOLD5.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/Beijing-Opera', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/1 [00:00 0 Hours : 0 Minutes : 1 Seconds 81 | 82 | 83 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 84 | _warn_prf(average, modifier, msg_start, len(result)) 85 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 86 | _warn_prf(average, modifier, msg_start, len(result)) 87 | REDACTED/.venvs/palm/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. 88 | _warn_prf(average, modifier, msg_start, len(result)) 89 | Accuracy = 0.3191 90 | F1-Score = 0.1890 91 | Precision = 0.3222 92 | Recall = 0.2955 93 | Average Loss = 3.6894 94 | 95 | 96 | Saving Results ... 97 | Results Saved 98 | 99 | 100 | -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD1-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: UrbanSound8K 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/UrbanSound8K 22 | 23 | Number of Classes: 10 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 160 28 | 29 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'] 30 | 31 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'} 32 | 33 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/UrbanSound8K 43 | 44 | Number of Classes: 10 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 873 49 | 50 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'] 51 | 52 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'} 53 | 54 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of air conditioner.', 'The is a recording of car horn.', 'The is a recording of children playing.', 'The is a recording of dog bark.', 'The is a recording of drilling.', 'The is a recording of engine idling.', 'The is a recording of gun shot.', 'The is a recording of jackhammer.', 'The is a recording of siren.', 'The is a recording of street music.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/UrbanSound8K', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='UrbanSound8K-FOLD1', freq_test_model=10, json_file_path='logs/pengi_zeroshot/UrbanSound8K-FOLD1.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/UrbanSound8K', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/7 [00:00 0 Hours : 0 Minutes : 8 Seconds 87 | 88 | 89 | Accuracy = 0.5487 90 | F1-Score = 0.5236 91 | Precision = 0.5844 92 | Recall = 0.5538 93 | Average Loss = 1.6837 94 | 95 | 96 | Saving Results ... 97 | Results Saved 98 | 99 | 100 | -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD10-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: UrbanSound8K 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/UrbanSound8K 22 | 23 | Number of Classes: 10 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 160 28 | 29 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'] 30 | 31 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'} 32 | 33 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/UrbanSound8K 43 | 44 | Number of Classes: 10 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 837 49 | 50 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'] 51 | 52 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'} 53 | 54 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of air conditioner.', 'The is a recording of car horn.', 'The is a recording of children playing.', 'The is a recording of dog bark.', 'The is a recording of drilling.', 'The is a recording of engine idling.', 'The is a recording of gun shot.', 'The is a recording of jackhammer.', 'The is a recording of siren.', 'The is a recording of street music.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/UrbanSound8K', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='UrbanSound8K-FOLD10', freq_test_model=10, json_file_path='logs/pengi_zeroshot/UrbanSound8K-FOLD10.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/UrbanSound8K', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/7 [00:00 0 Hours : 0 Minutes : 6 Seconds 87 | 88 | 89 | Accuracy = 0.5078 90 | F1-Score = 0.4733 91 | Precision = 0.5256 92 | Recall = 0.5120 93 | Average Loss = 1.7276 94 | 95 | 96 | Saving Results ... 97 | Results Saved 98 | 99 | 100 | -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD2-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: UrbanSound8K 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/UrbanSound8K 22 | 23 | Number of Classes: 10 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 160 28 | 29 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'] 30 | 31 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'} 32 | 33 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/UrbanSound8K 43 | 44 | Number of Classes: 10 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 888 49 | 50 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'] 51 | 52 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'} 53 | 54 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of air conditioner.', 'The is a recording of car horn.', 'The is a recording of children playing.', 'The is a recording of dog bark.', 'The is a recording of drilling.', 'The is a recording of engine idling.', 'The is a recording of gun shot.', 'The is a recording of jackhammer.', 'The is a recording of siren.', 'The is a recording of street music.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/UrbanSound8K', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='UrbanSound8K-FOLD2', freq_test_model=10, json_file_path='logs/pengi_zeroshot/UrbanSound8K-FOLD2.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/UrbanSound8K', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/7 [00:00 0 Hours : 0 Minutes : 5 Seconds 87 | 88 | 89 | Accuracy = 0.5450 90 | F1-Score = 0.5010 91 | Precision = 0.5382 92 | Recall = 0.5059 93 | Average Loss = 1.6292 94 | 95 | 96 | Saving Results ... 97 | Results Saved 98 | 99 | 100 | -------------------------------------------------------------------------------- /logs/zeroshot/UrbanSound8K-FOLD5-SEED0.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################################## 4 | PALM: Prompt Learning in Audio Language Models 5 | ############################################## 6 | 7 | 8 | 9 | 10 | Model: PENGI_ZEROSHOT 11 | Dataset: UrbanSound8K 12 | Seed: 0 13 | 14 | 15 | Creating a 16-shot dataset ... 16 | 17 | 18 | ################## Dataset Information ################## 19 | FewShot Dataset 20 | 21 | Root: REDACTED/Audio-Datasets/UrbanSound8K 22 | 23 | Number of Classes: 10 24 | 25 | Number of Shots: 16 26 | 27 | Total Number of Samples: 160 28 | 29 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'] 30 | 31 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'} 32 | 33 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9} 34 | ######################################################## 35 | 36 | 37 | 38 | 39 | ################## Dataset Information ################## 40 | FewShot Dataset 41 | 42 | Root: REDACTED/Audio-Datasets/UrbanSound8K 43 | 44 | Number of Classes: 10 45 | 46 | Number of Shots: -1 47 | 48 | Total Number of Samples: 936 49 | 50 | Classnames: ['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'] 51 | 52 | Label to Classname: {0: 'air conditioner', 1: 'car horn', 2: 'children playing', 3: 'dog bark', 4: 'drilling', 5: 'engine idling', 6: 'gun shot', 7: 'jackhammer', 8: 'siren', 9: 'street music'} 53 | 54 | Classname to Label: {'air conditioner': 0, 'car horn': 1, 'children playing': 2, 'dog bark': 3, 'drilling': 4, 'engine idling': 5, 'gun shot': 6, 'jackhammer': 7, 'siren': 8, 'street music': 9} 55 | ######################################################## 56 | 57 | 58 | REDACTED/.venvs/palm/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. 59 | warnings.warn( 60 | 61 | 62 | ################## Zero-Shot PENGI Information ################## 63 | Prompt Prefix: The is a recording of 64 | Prompts: ['The is a recording of air conditioner.', 'The is a recording of car horn.', 'The is a recording of children playing.', 'The is a recording of dog bark.', 'The is a recording of drilling.', 'The is a recording of engine idling.', 'The is a recording of gun shot.', 'The is a recording of jackhammer.', 'The is a recording of siren.', 'The is a recording of street music.'] 65 | ################################################################### 66 | 67 | 68 | args: Namespace(batch_size=16, classnames=['air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music'], ctx_dim=1024, dataset_root='REDACTED/Audio-Datasets/UrbanSound8K', device=device(type='cuda'), do_logging=True, eval_only=True, exp_name='UrbanSound8K-FOLD5', freq_test_model=10, json_file_path='logs/pengi_zeroshot/UrbanSound8K-FOLD5.json', log_dir='logs/pengi_zeroshot', lr=0.05, model_name='pengi_zeroshot', model_path='', n_ctx=16, n_epochs=50, num_shots=16, process_audio_fn=>, prompt_prefix='The is a recording of ', repeat=False, resample=True, rho=0.3, save_model_path='REDACTED/Audio-Datasets/UrbanSound8K', seed=0, spec_aug=False, start_epoch=0) 69 | 70 | 71 | Evaluating the model ... 72 | 73 | 0%| | 0/8 [00:00 0 Hours : 0 Minutes : 5 Seconds 87 | 88 | 89 | Accuracy = 0.6047 90 | F1-Score = 0.5699 91 | Precision = 0.6039 92 | Recall = 0.6031 93 | Average Loss = 1.4961 94 | 95 | 96 | Saving Results ... 97 | Results Saved 98 | 99 | 100 | --------------------------------------------------------------------------------