├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── configs ├── text_to_audio │ ├── bigvgan_args.yaml │ ├── clap_args.yaml │ └── txt2audio_args.yaml └── train │ ├── diffusion.yaml │ └── vae.yaml ├── data └── audiocaps_test.tsv ├── gen_wav.py ├── gen_wavs_by_tsv.py ├── ldm ├── data │ ├── audiocaps_fn2cap.json │ └── joinaudiodataset_624.py ├── lr_scheduler.py ├── models │ ├── autoencoder.py │ ├── autoencoder_multi.py │ └── diffusion │ │ ├── __init__.py │ │ ├── classifier.py │ │ ├── ddim.py │ │ ├── ddpm.py │ │ ├── ddpm_audio.py │ │ ├── ddpm_audio_inpaint.py │ │ └── plms.py ├── modules │ ├── attention.py │ ├── diffusionmodules │ │ ├── __init__.py │ │ ├── custom_openaimodel.py │ │ ├── model.py │ │ ├── openaimodel.py │ │ └── util.py │ ├── discriminator │ │ ├── model.py │ │ └── multi_window_disc.py │ ├── distributions │ │ ├── __init__.py │ │ └── distributions.py │ ├── ema.py │ ├── encoders │ │ ├── CLAP │ │ │ ├── CLAPWrapper.py │ │ │ ├── __init__.py │ │ │ ├── audio.py │ │ │ ├── clap.py │ │ │ ├── config.yml │ │ │ └── utils.py │ │ ├── __init__.py │ │ └── modules.py │ ├── losses_audio │ │ ├── __init__.py │ │ ├── contperceptual.py │ │ └── vqperceptual.py │ └── x_transformer.py └── util.py ├── main.py ├── preprocess ├── NAT_mel.py ├── __init__.py └── mel_spec.py ├── requirements.txt ├── scripts ├── audio2audio.py └── test.py ├── useful_ckpts ├── .DS_Store └── CLAP │ └── config.yml ├── vocoder └── bigvgan │ ├── __init__.py │ ├── activations.py │ ├── alias_free_torch │ ├── __init__.py │ ├── act.py │ ├── filter.py │ └── resample.py │ └── models.py └── wav_evaluation ├── cal_clap_score.py └── models ├── CLAPWrapper.py ├── __init__.py ├── audio.py ├── clap.py └── utils.py /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/README.md -------------------------------------------------------------------------------- /configs/text_to_audio/bigvgan_args.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/configs/text_to_audio/bigvgan_args.yaml -------------------------------------------------------------------------------- /configs/text_to_audio/clap_args.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/configs/text_to_audio/clap_args.yaml -------------------------------------------------------------------------------- /configs/text_to_audio/txt2audio_args.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/configs/text_to_audio/txt2audio_args.yaml -------------------------------------------------------------------------------- /configs/train/diffusion.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/configs/train/diffusion.yaml -------------------------------------------------------------------------------- /configs/train/vae.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/configs/train/vae.yaml -------------------------------------------------------------------------------- /data/audiocaps_test.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/data/audiocaps_test.tsv -------------------------------------------------------------------------------- /gen_wav.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/gen_wav.py -------------------------------------------------------------------------------- /gen_wavs_by_tsv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/gen_wavs_by_tsv.py -------------------------------------------------------------------------------- /ldm/data/audiocaps_fn2cap.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/data/audiocaps_fn2cap.json -------------------------------------------------------------------------------- /ldm/data/joinaudiodataset_624.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/data/joinaudiodataset_624.py -------------------------------------------------------------------------------- /ldm/lr_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/lr_scheduler.py -------------------------------------------------------------------------------- /ldm/models/autoencoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/models/autoencoder.py -------------------------------------------------------------------------------- /ldm/models/autoencoder_multi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/models/autoencoder_multi.py -------------------------------------------------------------------------------- /ldm/models/diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ldm/models/diffusion/classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/models/diffusion/classifier.py -------------------------------------------------------------------------------- /ldm/models/diffusion/ddim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/models/diffusion/ddim.py -------------------------------------------------------------------------------- /ldm/models/diffusion/ddpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/models/diffusion/ddpm.py -------------------------------------------------------------------------------- /ldm/models/diffusion/ddpm_audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/models/diffusion/ddpm_audio.py -------------------------------------------------------------------------------- /ldm/models/diffusion/ddpm_audio_inpaint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/models/diffusion/ddpm_audio_inpaint.py -------------------------------------------------------------------------------- /ldm/models/diffusion/plms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/models/diffusion/plms.py -------------------------------------------------------------------------------- /ldm/modules/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/attention.py -------------------------------------------------------------------------------- /ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ldm/modules/diffusionmodules/custom_openaimodel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/diffusionmodules/custom_openaimodel.py -------------------------------------------------------------------------------- /ldm/modules/diffusionmodules/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/diffusionmodules/model.py -------------------------------------------------------------------------------- /ldm/modules/diffusionmodules/openaimodel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/diffusionmodules/openaimodel.py -------------------------------------------------------------------------------- /ldm/modules/diffusionmodules/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/diffusionmodules/util.py -------------------------------------------------------------------------------- /ldm/modules/discriminator/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/discriminator/model.py -------------------------------------------------------------------------------- /ldm/modules/discriminator/multi_window_disc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/discriminator/multi_window_disc.py -------------------------------------------------------------------------------- /ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ldm/modules/distributions/distributions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/distributions/distributions.py -------------------------------------------------------------------------------- /ldm/modules/ema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/ema.py -------------------------------------------------------------------------------- /ldm/modules/encoders/CLAP/CLAPWrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/encoders/CLAP/CLAPWrapper.py -------------------------------------------------------------------------------- /ldm/modules/encoders/CLAP/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/encoders/CLAP/__init__.py -------------------------------------------------------------------------------- /ldm/modules/encoders/CLAP/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/encoders/CLAP/audio.py -------------------------------------------------------------------------------- /ldm/modules/encoders/CLAP/clap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/encoders/CLAP/clap.py -------------------------------------------------------------------------------- /ldm/modules/encoders/CLAP/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/encoders/CLAP/config.yml -------------------------------------------------------------------------------- /ldm/modules/encoders/CLAP/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/encoders/CLAP/utils.py -------------------------------------------------------------------------------- /ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ldm/modules/encoders/modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/encoders/modules.py -------------------------------------------------------------------------------- /ldm/modules/losses_audio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/losses_audio/__init__.py -------------------------------------------------------------------------------- /ldm/modules/losses_audio/contperceptual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/losses_audio/contperceptual.py -------------------------------------------------------------------------------- /ldm/modules/losses_audio/vqperceptual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/losses_audio/vqperceptual.py -------------------------------------------------------------------------------- /ldm/modules/x_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/modules/x_transformer.py -------------------------------------------------------------------------------- /ldm/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/ldm/util.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/main.py -------------------------------------------------------------------------------- /preprocess/NAT_mel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/preprocess/NAT_mel.py -------------------------------------------------------------------------------- /preprocess/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /preprocess/mel_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/preprocess/mel_spec.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/audio2audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/scripts/audio2audio.py -------------------------------------------------------------------------------- /scripts/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/scripts/test.py -------------------------------------------------------------------------------- /useful_ckpts/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/useful_ckpts/.DS_Store -------------------------------------------------------------------------------- /useful_ckpts/CLAP/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/useful_ckpts/CLAP/config.yml -------------------------------------------------------------------------------- /vocoder/bigvgan/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vocoder/bigvgan/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/vocoder/bigvgan/activations.py -------------------------------------------------------------------------------- /vocoder/bigvgan/alias_free_torch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/vocoder/bigvgan/alias_free_torch/__init__.py -------------------------------------------------------------------------------- /vocoder/bigvgan/alias_free_torch/act.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/vocoder/bigvgan/alias_free_torch/act.py -------------------------------------------------------------------------------- /vocoder/bigvgan/alias_free_torch/filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/vocoder/bigvgan/alias_free_torch/filter.py -------------------------------------------------------------------------------- /vocoder/bigvgan/alias_free_torch/resample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/vocoder/bigvgan/alias_free_torch/resample.py -------------------------------------------------------------------------------- /vocoder/bigvgan/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/vocoder/bigvgan/models.py -------------------------------------------------------------------------------- /wav_evaluation/cal_clap_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/wav_evaluation/cal_clap_score.py -------------------------------------------------------------------------------- /wav_evaluation/models/CLAPWrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/wav_evaluation/models/CLAPWrapper.py -------------------------------------------------------------------------------- /wav_evaluation/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/wav_evaluation/models/__init__.py -------------------------------------------------------------------------------- /wav_evaluation/models/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/wav_evaluation/models/audio.py -------------------------------------------------------------------------------- /wav_evaluation/models/clap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/wav_evaluation/models/clap.py -------------------------------------------------------------------------------- /wav_evaluation/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Text-to-Audio/Make-An-Audio/HEAD/wav_evaluation/models/utils.py --------------------------------------------------------------------------------