├── .gitignore ├── README.md ├── audio_captioning ├── README.md ├── clip │ ├── AudioCLIP │ │ ├── .gitattributes │ │ ├── assets │ │ │ └── bpe_simple_vocab_16e6.txt.gz │ │ ├── clip.py │ │ ├── ignite_trainer │ │ │ ├── __init__.py │ │ │ ├── _interfaces.py │ │ │ ├── _trainer.py │ │ │ ├── _utils.py │ │ │ ├── _visdom.py │ │ │ └── version.py │ │ ├── main.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── audioclip.py │ │ │ ├── clip │ │ │ │ ├── __init__.py │ │ │ │ ├── clip.py │ │ │ │ └── model.py │ │ │ └── esresnet │ │ │ │ ├── __init__.py │ │ │ │ ├── attention.py │ │ │ │ ├── base.py │ │ │ │ └── fbsp.py │ │ ├── requirements.txt │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── esc50.py │ │ │ └── us8k.py │ │ │ ├── simple_tokenizer.py │ │ │ └── transforms.py │ ├── WavCaps │ │ ├── README.md │ │ ├── captioning │ │ │ ├── README.md │ │ │ ├── data │ │ │ │ ├── AudioCaps │ │ │ │ │ └── csv_files │ │ │ │ │ │ ├── test.csv │ │ │ │ │ │ ├── train.csv │ │ │ │ │ │ └── val.csv │ │ │ │ └── Clotho │ │ │ │ │ └── csv_files │ │ │ │ │ ├── test.csv │ │ │ │ │ ├── train.csv │ │ │ │ │ └── val.csv │ │ │ ├── data_handling │ │ │ │ ├── caption_dataset.py │ │ │ │ ├── datamodule.py │ │ │ │ ├── pretrain_dataset.py │ │ │ │ ├── sampler.py │ │ │ │ └── text_transform.py │ │ │ ├── eval_metrics.py │ │ │ ├── models │ │ │ │ ├── audio_encoder.py │ │ │ │ ├── audio_encoder_config.py │ │ │ │ ├── bart_captioning.py │ │ │ │ ├── bert_captioning.py │ │ │ │ ├── cnns.py │ │ │ │ ├── feature_extractor.py │ │ │ │ └── htsat.py │ │ │ ├── pretrain.py │ │ │ ├── settings │ │ │ │ └── pretrain.yaml │ │ │ ├── tools │ │ │ │ ├── config_loader.py │ │ │ │ ├── dataset.py │ │ │ │ ├── file_io.py │ │ │ │ ├── loss.py │ │ │ │ ├── optim_utils.py │ │ │ │ └── utils.py │ │ │ └── train.py │ │ ├── data │ │ │ └── json_files │ │ │ │ └── FreeSound │ │ │ │ └── .gitattributes │ │ └── retrieval │ │ │ ├── README.md │ │ │ ├── data │ │ │ ├── AudioCaps │ │ │ │ └── csv_files │ │ │ │ │ ├── test.csv │ │ │ │ │ ├── train.csv │ │ │ │ │ └── val.csv │ │ │ └── Clotho │ │ │ │ └── csv_files │ │ │ │ ├── test.csv │ │ │ │ ├── train.csv │ │ │ │ └── val.csv │ │ │ ├── data_handling │ │ │ ├── caption_dataset.py │ │ │ ├── datamodule.py │ │ │ ├── pretrain_dataset.py │ │ │ ├── sampler.py │ │ │ └── text_transform.py │ │ │ ├── models │ │ │ ├── ase_model.py │ │ │ ├── audio_encoder.py │ │ │ ├── cnns.py │ │ │ ├── feature_extractor.py │ │ │ ├── htsat.py │ │ │ └── text_encoder.py │ │ │ ├── pretrain.py │ │ │ ├── settings │ │ │ ├── inference.yaml │ │ │ ├── pretrain.yaml │ │ │ ├── pretrain_all.yaml │ │ │ └── train.yaml │ │ │ ├── tools │ │ │ ├── losses.py │ │ │ ├── optim_utils.py │ │ │ └── utils.py │ │ │ ├── train.py │ │ │ └── zero_shot_classification.py │ ├── audio_preprocessors.py │ └── load_clip_model.py ├── data │ ├── AudioCaps │ │ ├── AudioCaps_code_exp.json │ │ ├── AudioCaps_test.json │ │ ├── AudioCaps_train.json │ │ └── AudioCaps_val.json │ ├── AudioSet │ │ └── class_labels_indices.csv │ ├── Clotho │ │ ├── clotho_v2.1_test.json │ │ ├── clotho_v2.1_train.json │ │ └── clotho_v2.1_val.json │ ├── README.md │ ├── prep_data.md │ ├── process_AudioCaps.py │ ├── process_clotho_v2.1.py │ ├── sounding_objects │ │ └── chatgpt_audio_tags.csv │ └── summary_stats_data.py ├── evaluation │ ├── README.md │ ├── cocoeval.py │ ├── development_result_jsons │ │ ├── create_model_ablation_plot.ipynb │ │ └── create_val_plot.ipynb │ ├── get_stanford_models.sh │ ├── join_test_results.py │ ├── plots │ │ ├── AudioCaps_beta_ablation_plot.png │ │ ├── AudioCaps_l_ablation_plot.png │ │ ├── AudioCaps_model_ablation.png │ │ └── AudioCaps_validation_sweep.png │ ├── pycocotools │ │ ├── __init__.py │ │ └── coco.py │ └── sweep_ablation_table.py ├── inference_magic.py ├── language_model │ ├── loss_func.py │ ├── simctg.py │ └── utlis.py ├── plot_metrics.py ├── sh_folder │ ├── AudioCLIP_AudioSet+ChatGPT_KW.sh │ ├── AudioCLIP_AudioSet_KW.sh │ ├── LAION_AudioSet+ChatGPT_KW.sh │ ├── LAION_AudioSet_KW.sh │ ├── MAGIC_AudioCLIP.sh │ ├── MAGIC_AudioCLIP_AudioSet+ChatGPT_KW.sh │ ├── MAGIC_AudioCLIP_AudioSet_KW.sh │ ├── MAGIC_LAION.sh │ ├── MAGIC_LAION_AudioSet+ChatGPT_KW.sh │ ├── MAGIC_LAION_AudioSet_KW.sh │ ├── MAGIC_WavCaps.sh │ ├── MAGIC_WavCaps_AudioSet+ChatGPT_KW.sh │ ├── MAGIC_WavCaps_AudioSet_KW.sh │ ├── MAGIC_WavCaps_AudioSet_KW_beta_sweep_AC.sh │ ├── MAGIC_WavCaps_AudioSet_KW_beta_sweep_Cl.sh │ ├── MAGIC_WavCaps_AudioSet_KW_hyperparam_sweep_AC.sh │ ├── MAGIC_WavCaps_AudioSet_KW_l_sweep_AC.sh │ ├── MAGIC_WavCaps_AudioSet_KW_l_sweep_Cl.sh │ ├── MAGIC_code_test.sh │ ├── WavCaps_AudioSet+ChatGPT_KW.sh │ ├── WavCaps_AudioSet_KW.sh │ ├── baseline.sh │ ├── create_SOTA_table.sh │ ├── create_beta_ablation_table_AC.sh │ ├── create_beta_ablation_table_Cl.sh │ ├── create_l_ablation_table_AC.sh │ └── create_l_ablation_table_Cl.sh └── sound_obj_generator.py ├── docs └── images │ └── Zero-Shot-Audio-Captioning-Model-Figure-10.png └── environment.yaml /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/README.md -------------------------------------------------------------------------------- /audio_captioning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/README.md -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/.gitattributes -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/assets/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/assets/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/clip.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/ignite_trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/ignite_trainer/__init__.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/ignite_trainer/_interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/ignite_trainer/_interfaces.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/ignite_trainer/_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/ignite_trainer/_trainer.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/ignite_trainer/_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/ignite_trainer/_utils.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/ignite_trainer/_visdom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/ignite_trainer/_visdom.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/ignite_trainer/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.5b5" 2 | -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/main.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/model/__init__.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/model/audioclip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/model/audioclip.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/model/clip/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/model/clip/__init__.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/model/clip/clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/model/clip/clip.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/model/clip/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/model/clip/model.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/model/esresnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/model/esresnet/__init__.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/model/esresnet/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/model/esresnet/attention.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/model/esresnet/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/model/esresnet/base.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/model/esresnet/fbsp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/model/esresnet/fbsp.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/requirements.txt -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/utils/__init__.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/utils/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/utils/datasets/__init__.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/utils/datasets/esc50.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/utils/datasets/esc50.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/utils/datasets/us8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/utils/datasets/us8k.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/utils/simple_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/utils/simple_tokenizer.py -------------------------------------------------------------------------------- /audio_captioning/clip/AudioCLIP/utils/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/AudioCLIP/utils/transforms.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/README.md -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/README.md -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/data/AudioCaps/csv_files/test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/data/AudioCaps/csv_files/test.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/data/AudioCaps/csv_files/train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/data/AudioCaps/csv_files/train.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/data/AudioCaps/csv_files/val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/data/AudioCaps/csv_files/val.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/data/Clotho/csv_files/test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/data/Clotho/csv_files/test.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/data/Clotho/csv_files/train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/data/Clotho/csv_files/train.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/data/Clotho/csv_files/val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/data/Clotho/csv_files/val.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/data_handling/caption_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/data_handling/caption_dataset.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/data_handling/datamodule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/data_handling/datamodule.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/data_handling/pretrain_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/data_handling/pretrain_dataset.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/data_handling/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/data_handling/sampler.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/data_handling/text_transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/data_handling/text_transform.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/eval_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/eval_metrics.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/models/audio_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/models/audio_encoder.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/models/audio_encoder_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/models/audio_encoder_config.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/models/bart_captioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/models/bart_captioning.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/models/bert_captioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/models/bert_captioning.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/models/cnns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/models/cnns.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/models/feature_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/models/feature_extractor.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/models/htsat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/models/htsat.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/pretrain.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/settings/pretrain.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/settings/pretrain.yaml -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/tools/config_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/tools/config_loader.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/tools/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/tools/dataset.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/tools/file_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/tools/file_io.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/tools/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/tools/loss.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/tools/optim_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/tools/optim_utils.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/tools/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/tools/utils.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/captioning/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/captioning/train.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/data/json_files/FreeSound/.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/data/json_files/FreeSound/.gitattributes -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/README.md -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/data/AudioCaps/csv_files/test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/data/AudioCaps/csv_files/test.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/data/AudioCaps/csv_files/train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/data/AudioCaps/csv_files/train.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/data/AudioCaps/csv_files/val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/data/AudioCaps/csv_files/val.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/data/Clotho/csv_files/test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/data/Clotho/csv_files/test.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/data/Clotho/csv_files/train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/data/Clotho/csv_files/train.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/data/Clotho/csv_files/val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/data/Clotho/csv_files/val.csv -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/data_handling/caption_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/data_handling/caption_dataset.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/data_handling/datamodule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/data_handling/datamodule.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/data_handling/pretrain_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/data_handling/pretrain_dataset.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/data_handling/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/data_handling/sampler.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/data_handling/text_transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/data_handling/text_transform.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/models/ase_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/models/ase_model.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/models/audio_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/models/audio_encoder.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/models/cnns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/models/cnns.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/models/feature_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/models/feature_extractor.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/models/htsat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/models/htsat.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/models/text_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/models/text_encoder.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/pretrain.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/settings/inference.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/settings/inference.yaml -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/settings/pretrain.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/settings/pretrain.yaml -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/settings/pretrain_all.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/settings/pretrain_all.yaml -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/settings/train.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/settings/train.yaml -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/tools/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/tools/losses.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/tools/optim_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/tools/optim_utils.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/tools/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/tools/utils.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/train.py -------------------------------------------------------------------------------- /audio_captioning/clip/WavCaps/retrieval/zero_shot_classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/WavCaps/retrieval/zero_shot_classification.py -------------------------------------------------------------------------------- /audio_captioning/clip/audio_preprocessors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/audio_preprocessors.py -------------------------------------------------------------------------------- /audio_captioning/clip/load_clip_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/clip/load_clip_model.py -------------------------------------------------------------------------------- /audio_captioning/data/AudioCaps/AudioCaps_code_exp.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/AudioCaps/AudioCaps_code_exp.json -------------------------------------------------------------------------------- /audio_captioning/data/AudioCaps/AudioCaps_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/AudioCaps/AudioCaps_test.json -------------------------------------------------------------------------------- /audio_captioning/data/AudioCaps/AudioCaps_train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/AudioCaps/AudioCaps_train.json -------------------------------------------------------------------------------- /audio_captioning/data/AudioCaps/AudioCaps_val.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/AudioCaps/AudioCaps_val.json -------------------------------------------------------------------------------- /audio_captioning/data/AudioSet/class_labels_indices.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/AudioSet/class_labels_indices.csv -------------------------------------------------------------------------------- /audio_captioning/data/Clotho/clotho_v2.1_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/Clotho/clotho_v2.1_test.json -------------------------------------------------------------------------------- /audio_captioning/data/Clotho/clotho_v2.1_train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/Clotho/clotho_v2.1_train.json -------------------------------------------------------------------------------- /audio_captioning/data/Clotho/clotho_v2.1_val.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/Clotho/clotho_v2.1_val.json -------------------------------------------------------------------------------- /audio_captioning/data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/README.md -------------------------------------------------------------------------------- /audio_captioning/data/prep_data.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/prep_data.md -------------------------------------------------------------------------------- /audio_captioning/data/process_AudioCaps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/process_AudioCaps.py -------------------------------------------------------------------------------- /audio_captioning/data/process_clotho_v2.1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/process_clotho_v2.1.py -------------------------------------------------------------------------------- /audio_captioning/data/sounding_objects/chatgpt_audio_tags.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/sounding_objects/chatgpt_audio_tags.csv -------------------------------------------------------------------------------- /audio_captioning/data/summary_stats_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/data/summary_stats_data.py -------------------------------------------------------------------------------- /audio_captioning/evaluation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/README.md -------------------------------------------------------------------------------- /audio_captioning/evaluation/cocoeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/cocoeval.py -------------------------------------------------------------------------------- /audio_captioning/evaluation/development_result_jsons/create_model_ablation_plot.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/development_result_jsons/create_model_ablation_plot.ipynb -------------------------------------------------------------------------------- /audio_captioning/evaluation/development_result_jsons/create_val_plot.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/development_result_jsons/create_val_plot.ipynb -------------------------------------------------------------------------------- /audio_captioning/evaluation/get_stanford_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/get_stanford_models.sh -------------------------------------------------------------------------------- /audio_captioning/evaluation/join_test_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/join_test_results.py -------------------------------------------------------------------------------- /audio_captioning/evaluation/plots/AudioCaps_beta_ablation_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/plots/AudioCaps_beta_ablation_plot.png -------------------------------------------------------------------------------- /audio_captioning/evaluation/plots/AudioCaps_l_ablation_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/plots/AudioCaps_l_ablation_plot.png -------------------------------------------------------------------------------- /audio_captioning/evaluation/plots/AudioCaps_model_ablation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/plots/AudioCaps_model_ablation.png -------------------------------------------------------------------------------- /audio_captioning/evaluation/plots/AudioCaps_validation_sweep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/plots/AudioCaps_validation_sweep.png -------------------------------------------------------------------------------- /audio_captioning/evaluation/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "tylin" 2 | -------------------------------------------------------------------------------- /audio_captioning/evaluation/pycocotools/coco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/pycocotools/coco.py -------------------------------------------------------------------------------- /audio_captioning/evaluation/sweep_ablation_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/evaluation/sweep_ablation_table.py -------------------------------------------------------------------------------- /audio_captioning/inference_magic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/inference_magic.py -------------------------------------------------------------------------------- /audio_captioning/language_model/loss_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/language_model/loss_func.py -------------------------------------------------------------------------------- /audio_captioning/language_model/simctg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/language_model/simctg.py -------------------------------------------------------------------------------- /audio_captioning/language_model/utlis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/language_model/utlis.py -------------------------------------------------------------------------------- /audio_captioning/plot_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/plot_metrics.py -------------------------------------------------------------------------------- /audio_captioning/sh_folder/AudioCLIP_AudioSet+ChatGPT_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/AudioCLIP_AudioSet+ChatGPT_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/AudioCLIP_AudioSet_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/AudioCLIP_AudioSet_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/LAION_AudioSet+ChatGPT_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/LAION_AudioSet+ChatGPT_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/LAION_AudioSet_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/LAION_AudioSet_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_AudioCLIP.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_AudioCLIP.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_AudioCLIP_AudioSet+ChatGPT_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_AudioCLIP_AudioSet+ChatGPT_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_AudioCLIP_AudioSet_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_AudioCLIP_AudioSet_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_LAION.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_LAION.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_LAION_AudioSet+ChatGPT_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_LAION_AudioSet+ChatGPT_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_LAION_AudioSet_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_LAION_AudioSet_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_WavCaps.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_WavCaps.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet+ChatGPT_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet+ChatGPT_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW_beta_sweep_AC.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW_beta_sweep_AC.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW_beta_sweep_Cl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW_beta_sweep_Cl.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW_hyperparam_sweep_AC.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW_hyperparam_sweep_AC.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW_l_sweep_AC.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW_l_sweep_AC.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW_l_sweep_Cl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_WavCaps_AudioSet_KW_l_sweep_Cl.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/MAGIC_code_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/MAGIC_code_test.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/WavCaps_AudioSet+ChatGPT_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/WavCaps_AudioSet+ChatGPT_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/WavCaps_AudioSet_KW.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/WavCaps_AudioSet_KW.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/baseline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/baseline.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/create_SOTA_table.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/create_SOTA_table.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/create_beta_ablation_table_AC.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/create_beta_ablation_table_AC.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/create_beta_ablation_table_Cl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/create_beta_ablation_table_Cl.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/create_l_ablation_table_AC.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/create_l_ablation_table_AC.sh -------------------------------------------------------------------------------- /audio_captioning/sh_folder/create_l_ablation_table_Cl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sh_folder/create_l_ablation_table_Cl.sh -------------------------------------------------------------------------------- /audio_captioning/sound_obj_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/audio_captioning/sound_obj_generator.py -------------------------------------------------------------------------------- /docs/images/Zero-Shot-Audio-Captioning-Model-Figure-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/docs/images/Zero-Shot-Audio-Captioning-Model-Figure-10.png -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ZerAuCap/HEAD/environment.yaml --------------------------------------------------------------------------------