├── .gitignore ├── LICENSE ├── LICENSES ├── LICENSE_ADP.txt ├── LICENSE_AEIOU.txt ├── LICENSE_AURALOSS.txt ├── LICENSE_DESCRIPT.txt ├── LICENSE_META.txt ├── LICENSE_NVIDIA.txt └── LICENSE_XTRANSFORMERS.txt ├── README.md ├── defaults.ini ├── docs ├── autoencoders.md ├── conditioning.md ├── datasets.md ├── diffusion.md ├── pre_encoding.md └── pretransforms.md ├── pre_encode.py ├── pyproject.toml ├── run_gradio.py ├── scripts └── ds_zero_to_pl_ckpt.py ├── setup.py ├── stable_audio_tools ├── __init__.py ├── configs │ ├── dataset_configs │ │ ├── custom_metadata │ │ │ └── custom_md_example.py │ │ ├── local_training_example.json │ │ └── s3_wds_example.json │ └── model_configs │ │ ├── autoencoders │ │ ├── dac_2048_32_vae.json │ │ ├── encodec_musicgen_rvq.json │ │ ├── stable_audio_1_0_vae.json │ │ └── stable_audio_2_0_vae.json │ │ ├── dance_diffusion │ │ ├── dance_diffusion_base.json │ │ ├── dance_diffusion_base_16k.json │ │ ├── dance_diffusion_base_44k.json │ │ └── dance_diffusion_large.json │ │ └── txt2audio │ │ ├── stable_audio_1_0.json │ │ └── stable_audio_2_0.json ├── data │ ├── __init__.py │ ├── dataset.py │ └── utils.py ├── inference │ ├── __init__.py │ ├── generation.py │ ├── sampling.py │ └── utils.py ├── interface │ ├── __init__.py │ ├── aeiou.py │ ├── gradio.py │ └── interfaces │ │ ├── __init__.py │ │ └── diffusion_cond.py ├── models │ ├── __init__.py │ ├── adp.py │ ├── arc.py │ ├── autoencoders.py │ ├── blocks.py │ ├── bottleneck.py │ ├── codebook_patterns.py │ ├── conditioners.py │ ├── convnext.py │ ├── diffusion.py │ ├── discriminators.py │ ├── dit.py │ ├── encodec.py │ ├── factory.py │ ├── fsq.py │ ├── inpainting.py │ ├── lm.py │ ├── lm_backbone.py │ ├── local_attention.py │ ├── pqmf.py │ ├── pretrained.py │ ├── pretransforms.py │ ├── transformer.py │ ├── utils.py │ └── wavelets.py └── training │ ├── __init__.py │ ├── arc.py │ ├── autoencoders.py │ ├── diffusion.py │ ├── factory.py │ ├── lm.py │ ├── losses │ ├── __init__.py │ ├── auraloss.py │ ├── losses.py │ ├── metrics.py │ ├── semantic.py │ └── utils.py │ └── utils.py ├── train.py └── unwrap_model.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/LICENSE -------------------------------------------------------------------------------- /LICENSES/LICENSE_ADP.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/LICENSES/LICENSE_ADP.txt -------------------------------------------------------------------------------- /LICENSES/LICENSE_AEIOU.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/LICENSES/LICENSE_AEIOU.txt -------------------------------------------------------------------------------- /LICENSES/LICENSE_AURALOSS.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/LICENSES/LICENSE_AURALOSS.txt -------------------------------------------------------------------------------- /LICENSES/LICENSE_DESCRIPT.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/LICENSES/LICENSE_DESCRIPT.txt -------------------------------------------------------------------------------- /LICENSES/LICENSE_META.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/LICENSES/LICENSE_META.txt -------------------------------------------------------------------------------- /LICENSES/LICENSE_NVIDIA.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/LICENSES/LICENSE_NVIDIA.txt -------------------------------------------------------------------------------- /LICENSES/LICENSE_XTRANSFORMERS.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/LICENSES/LICENSE_XTRANSFORMERS.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/README.md -------------------------------------------------------------------------------- /defaults.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/defaults.ini -------------------------------------------------------------------------------- /docs/autoencoders.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/docs/autoencoders.md -------------------------------------------------------------------------------- /docs/conditioning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/docs/conditioning.md -------------------------------------------------------------------------------- /docs/datasets.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/docs/datasets.md -------------------------------------------------------------------------------- /docs/diffusion.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/docs/diffusion.md -------------------------------------------------------------------------------- /docs/pre_encoding.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/docs/pre_encoding.md -------------------------------------------------------------------------------- /docs/pretransforms.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/docs/pretransforms.md -------------------------------------------------------------------------------- /pre_encode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/pre_encode.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/pyproject.toml -------------------------------------------------------------------------------- /run_gradio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/run_gradio.py -------------------------------------------------------------------------------- /scripts/ds_zero_to_pl_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/scripts/ds_zero_to_pl_ckpt.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/setup.py -------------------------------------------------------------------------------- /stable_audio_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/__init__.py -------------------------------------------------------------------------------- /stable_audio_tools/configs/dataset_configs/custom_metadata/custom_md_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/dataset_configs/custom_metadata/custom_md_example.py -------------------------------------------------------------------------------- /stable_audio_tools/configs/dataset_configs/local_training_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/dataset_configs/local_training_example.json -------------------------------------------------------------------------------- /stable_audio_tools/configs/dataset_configs/s3_wds_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/dataset_configs/s3_wds_example.json -------------------------------------------------------------------------------- /stable_audio_tools/configs/model_configs/autoencoders/dac_2048_32_vae.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/model_configs/autoencoders/dac_2048_32_vae.json -------------------------------------------------------------------------------- /stable_audio_tools/configs/model_configs/autoencoders/encodec_musicgen_rvq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/model_configs/autoencoders/encodec_musicgen_rvq.json -------------------------------------------------------------------------------- /stable_audio_tools/configs/model_configs/autoencoders/stable_audio_1_0_vae.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/model_configs/autoencoders/stable_audio_1_0_vae.json -------------------------------------------------------------------------------- /stable_audio_tools/configs/model_configs/autoencoders/stable_audio_2_0_vae.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/model_configs/autoencoders/stable_audio_2_0_vae.json -------------------------------------------------------------------------------- /stable_audio_tools/configs/model_configs/dance_diffusion/dance_diffusion_base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/model_configs/dance_diffusion/dance_diffusion_base.json -------------------------------------------------------------------------------- /stable_audio_tools/configs/model_configs/dance_diffusion/dance_diffusion_base_16k.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/model_configs/dance_diffusion/dance_diffusion_base_16k.json -------------------------------------------------------------------------------- /stable_audio_tools/configs/model_configs/dance_diffusion/dance_diffusion_base_44k.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/model_configs/dance_diffusion/dance_diffusion_base_44k.json -------------------------------------------------------------------------------- /stable_audio_tools/configs/model_configs/dance_diffusion/dance_diffusion_large.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/model_configs/dance_diffusion/dance_diffusion_large.json -------------------------------------------------------------------------------- /stable_audio_tools/configs/model_configs/txt2audio/stable_audio_1_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/model_configs/txt2audio/stable_audio_1_0.json -------------------------------------------------------------------------------- /stable_audio_tools/configs/model_configs/txt2audio/stable_audio_2_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/configs/model_configs/txt2audio/stable_audio_2_0.json -------------------------------------------------------------------------------- /stable_audio_tools/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stable_audio_tools/data/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/data/dataset.py -------------------------------------------------------------------------------- /stable_audio_tools/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/data/utils.py -------------------------------------------------------------------------------- /stable_audio_tools/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stable_audio_tools/inference/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/inference/generation.py -------------------------------------------------------------------------------- /stable_audio_tools/inference/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/inference/sampling.py -------------------------------------------------------------------------------- /stable_audio_tools/inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/inference/utils.py -------------------------------------------------------------------------------- /stable_audio_tools/interface/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stable_audio_tools/interface/aeiou.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/interface/aeiou.py -------------------------------------------------------------------------------- /stable_audio_tools/interface/gradio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/interface/gradio.py -------------------------------------------------------------------------------- /stable_audio_tools/interface/interfaces/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stable_audio_tools/interface/interfaces/diffusion_cond.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/interface/interfaces/diffusion_cond.py -------------------------------------------------------------------------------- /stable_audio_tools/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/__init__.py -------------------------------------------------------------------------------- /stable_audio_tools/models/adp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/adp.py -------------------------------------------------------------------------------- /stable_audio_tools/models/arc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/arc.py -------------------------------------------------------------------------------- /stable_audio_tools/models/autoencoders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/autoencoders.py -------------------------------------------------------------------------------- /stable_audio_tools/models/blocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/blocks.py -------------------------------------------------------------------------------- /stable_audio_tools/models/bottleneck.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/bottleneck.py -------------------------------------------------------------------------------- /stable_audio_tools/models/codebook_patterns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/codebook_patterns.py -------------------------------------------------------------------------------- /stable_audio_tools/models/conditioners.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/conditioners.py -------------------------------------------------------------------------------- /stable_audio_tools/models/convnext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/convnext.py -------------------------------------------------------------------------------- /stable_audio_tools/models/diffusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/diffusion.py -------------------------------------------------------------------------------- /stable_audio_tools/models/discriminators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/discriminators.py -------------------------------------------------------------------------------- /stable_audio_tools/models/dit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/dit.py -------------------------------------------------------------------------------- /stable_audio_tools/models/encodec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/encodec.py -------------------------------------------------------------------------------- /stable_audio_tools/models/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/factory.py -------------------------------------------------------------------------------- /stable_audio_tools/models/fsq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/fsq.py -------------------------------------------------------------------------------- /stable_audio_tools/models/inpainting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/inpainting.py -------------------------------------------------------------------------------- /stable_audio_tools/models/lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/lm.py -------------------------------------------------------------------------------- /stable_audio_tools/models/lm_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/lm_backbone.py -------------------------------------------------------------------------------- /stable_audio_tools/models/local_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/local_attention.py -------------------------------------------------------------------------------- /stable_audio_tools/models/pqmf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/pqmf.py -------------------------------------------------------------------------------- /stable_audio_tools/models/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/pretrained.py -------------------------------------------------------------------------------- /stable_audio_tools/models/pretransforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/pretransforms.py -------------------------------------------------------------------------------- /stable_audio_tools/models/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/transformer.py -------------------------------------------------------------------------------- /stable_audio_tools/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/utils.py -------------------------------------------------------------------------------- /stable_audio_tools/models/wavelets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/models/wavelets.py -------------------------------------------------------------------------------- /stable_audio_tools/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/__init__.py -------------------------------------------------------------------------------- /stable_audio_tools/training/arc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/arc.py -------------------------------------------------------------------------------- /stable_audio_tools/training/autoencoders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/autoencoders.py -------------------------------------------------------------------------------- /stable_audio_tools/training/diffusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/diffusion.py -------------------------------------------------------------------------------- /stable_audio_tools/training/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/factory.py -------------------------------------------------------------------------------- /stable_audio_tools/training/lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/lm.py -------------------------------------------------------------------------------- /stable_audio_tools/training/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/losses/__init__.py -------------------------------------------------------------------------------- /stable_audio_tools/training/losses/auraloss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/losses/auraloss.py -------------------------------------------------------------------------------- /stable_audio_tools/training/losses/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/losses/losses.py -------------------------------------------------------------------------------- /stable_audio_tools/training/losses/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/losses/metrics.py -------------------------------------------------------------------------------- /stable_audio_tools/training/losses/semantic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/losses/semantic.py -------------------------------------------------------------------------------- /stable_audio_tools/training/losses/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/losses/utils.py -------------------------------------------------------------------------------- /stable_audio_tools/training/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/stable_audio_tools/training/utils.py -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/train.py -------------------------------------------------------------------------------- /unwrap_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Stability-AI/stable-audio-tools/HEAD/unwrap_model.py --------------------------------------------------------------------------------