├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── app.py ├── audioldm ├── __init__.py ├── __main__.py ├── audio │ ├── __init__.py │ ├── audio_processing.py │ ├── stft.py │ └── tools.py ├── clap │ ├── __init__.py │ ├── encoders.py │ ├── open_clip │ │ ├── __init__.py │ │ ├── bert.py │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ ├── factory.py │ │ ├── feature_fusion.py │ │ ├── htsat.py │ │ ├── linear_probe.py │ │ ├── loss.py │ │ ├── model.py │ │ ├── model_configs │ │ │ ├── HTSAT-base.json │ │ │ ├── HTSAT-large.json │ │ │ ├── HTSAT-tiny-win-1536.json │ │ │ ├── HTSAT-tiny.json │ │ │ ├── PANN-10.json │ │ │ ├── PANN-14-fmax-18k.json │ │ │ ├── PANN-14-fmax-8k-20s.json │ │ │ ├── PANN-14-tiny-transformer.json │ │ │ ├── PANN-14-win-1536.json │ │ │ ├── PANN-14.json │ │ │ ├── PANN-6.json │ │ │ ├── RN101-quickgelu.json │ │ │ ├── RN101.json │ │ │ ├── RN50-quickgelu.json │ │ │ ├── RN50.json │ │ │ ├── RN50x16.json │ │ │ ├── RN50x4.json │ │ │ ├── ViT-B-16.json │ │ │ ├── ViT-B-32-quickgelu.json │ │ │ ├── ViT-B-32.json │ │ │ └── ViT-L-14.json │ │ ├── openai.py │ │ ├── pann_model.py │ │ ├── pretrained.py │ │ ├── timm_model.py │ │ ├── tokenizer.py │ │ ├── transform.py │ │ ├── utils.py │ │ └── version.py │ └── training │ │ ├── __init__.py │ │ ├── audioset_textmap.npy │ │ ├── data.py │ │ ├── distributed.py │ │ ├── imagenet_zeroshot_data.py │ │ ├── infer_demo.py │ │ ├── logger.py │ │ ├── lp_main.py │ │ ├── lp_train.py │ │ ├── main.py │ │ ├── params.py │ │ ├── scheduler.py │ │ ├── train.py │ │ └── zero_shot.py ├── hifigan │ ├── __init__.py │ ├── models.py │ └── utilities.py ├── latent_diffusion │ ├── __init__.py │ ├── attention.py │ ├── ddim.py │ ├── ddpm.py │ ├── ema.py │ ├── openaimodel.py │ └── util.py ├── ldm.py ├── pipeline.py ├── utils.py └── variational_autoencoder │ ├── __init__.py │ ├── autoencoder.py │ ├── distributions.py │ └── modules.py ├── bg.png ├── bin ├── audioldm └── audioldm.cmd ├── ckpt └── .gitkeep ├── scripts ├── test.sh └── text2sound.py ├── setup.py └── trumpet.wav /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/README.md -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/app.py -------------------------------------------------------------------------------- /audioldm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/__init__.py -------------------------------------------------------------------------------- /audioldm/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/__main__.py -------------------------------------------------------------------------------- /audioldm/audio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/audio/__init__.py -------------------------------------------------------------------------------- /audioldm/audio/audio_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/audio/audio_processing.py -------------------------------------------------------------------------------- /audioldm/audio/stft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/audio/stft.py -------------------------------------------------------------------------------- /audioldm/audio/tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/audio/tools.py -------------------------------------------------------------------------------- /audioldm/clap/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audioldm/clap/encoders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/encoders.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/__init__.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/bert.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /audioldm/clap/open_clip/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/factory.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/feature_fusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/feature_fusion.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/htsat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/htsat.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/linear_probe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/linear_probe.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/loss.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/HTSAT-base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/HTSAT-base.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/HTSAT-large.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/HTSAT-large.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/HTSAT-tiny-win-1536.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/HTSAT-tiny-win-1536.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/HTSAT-tiny.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/HTSAT-tiny.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/PANN-10.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/PANN-10.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/PANN-14-fmax-18k.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/PANN-14-fmax-18k.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/PANN-14-fmax-8k-20s.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/PANN-14-fmax-8k-20s.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/PANN-14-tiny-transformer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/PANN-14-tiny-transformer.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/PANN-14-win-1536.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/PANN-14-win-1536.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/PANN-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/PANN-14.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/PANN-6.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/PANN-6.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/RN101-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/RN101-quickgelu.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/RN101.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/RN101.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/RN50-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/RN50-quickgelu.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/RN50.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/RN50.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/RN50x16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/RN50x16.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/RN50x4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/RN50x4.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/ViT-B-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/ViT-B-16.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/ViT-B-32-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/ViT-B-32-quickgelu.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/ViT-B-32.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/model_configs/ViT-L-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/model_configs/ViT-L-14.json -------------------------------------------------------------------------------- /audioldm/clap/open_clip/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/openai.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/pann_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/pann_model.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/pretrained.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/timm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/timm_model.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/tokenizer.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/transform.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/open_clip/utils.py -------------------------------------------------------------------------------- /audioldm/clap/open_clip/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.1" 2 | -------------------------------------------------------------------------------- /audioldm/clap/training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audioldm/clap/training/audioset_textmap.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/audioset_textmap.npy -------------------------------------------------------------------------------- /audioldm/clap/training/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/data.py -------------------------------------------------------------------------------- /audioldm/clap/training/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/distributed.py -------------------------------------------------------------------------------- /audioldm/clap/training/imagenet_zeroshot_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/imagenet_zeroshot_data.py -------------------------------------------------------------------------------- /audioldm/clap/training/infer_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/infer_demo.py -------------------------------------------------------------------------------- /audioldm/clap/training/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/logger.py -------------------------------------------------------------------------------- /audioldm/clap/training/lp_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/lp_main.py -------------------------------------------------------------------------------- /audioldm/clap/training/lp_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/lp_train.py -------------------------------------------------------------------------------- /audioldm/clap/training/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/main.py -------------------------------------------------------------------------------- /audioldm/clap/training/params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/params.py -------------------------------------------------------------------------------- /audioldm/clap/training/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/scheduler.py -------------------------------------------------------------------------------- /audioldm/clap/training/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/train.py -------------------------------------------------------------------------------- /audioldm/clap/training/zero_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/clap/training/zero_shot.py -------------------------------------------------------------------------------- /audioldm/hifigan/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/hifigan/__init__.py -------------------------------------------------------------------------------- /audioldm/hifigan/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/hifigan/models.py -------------------------------------------------------------------------------- /audioldm/hifigan/utilities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/hifigan/utilities.py -------------------------------------------------------------------------------- /audioldm/latent_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audioldm/latent_diffusion/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/latent_diffusion/attention.py -------------------------------------------------------------------------------- /audioldm/latent_diffusion/ddim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/latent_diffusion/ddim.py -------------------------------------------------------------------------------- /audioldm/latent_diffusion/ddpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/latent_diffusion/ddpm.py -------------------------------------------------------------------------------- /audioldm/latent_diffusion/ema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/latent_diffusion/ema.py -------------------------------------------------------------------------------- /audioldm/latent_diffusion/openaimodel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/latent_diffusion/openaimodel.py -------------------------------------------------------------------------------- /audioldm/latent_diffusion/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/latent_diffusion/util.py -------------------------------------------------------------------------------- /audioldm/ldm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/ldm.py -------------------------------------------------------------------------------- /audioldm/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/pipeline.py -------------------------------------------------------------------------------- /audioldm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/utils.py -------------------------------------------------------------------------------- /audioldm/variational_autoencoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audioldm/variational_autoencoder/autoencoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/variational_autoencoder/autoencoder.py -------------------------------------------------------------------------------- /audioldm/variational_autoencoder/distributions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/variational_autoencoder/distributions.py -------------------------------------------------------------------------------- /audioldm/variational_autoencoder/modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/audioldm/variational_autoencoder/modules.py -------------------------------------------------------------------------------- /bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/bg.png -------------------------------------------------------------------------------- /bin/audioldm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/bin/audioldm -------------------------------------------------------------------------------- /bin/audioldm.cmd: -------------------------------------------------------------------------------- 1 | @echo OFF 2 | python -m audioldm %* -------------------------------------------------------------------------------- /ckpt/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/scripts/test.sh -------------------------------------------------------------------------------- /scripts/text2sound.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/scripts/text2sound.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/setup.py -------------------------------------------------------------------------------- /trumpet.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoheliu/AudioLDM/HEAD/trumpet.wav --------------------------------------------------------------------------------