├── .dockerignore ├── .gitignore ├── LICENSE ├── README.md ├── ThinkSound ├── __init__.py ├── configs │ ├── model_configs │ │ ├── stable_audio_2_0_vae.json │ │ └── thinksound.json │ └── multimodal_dataset_demo.json ├── data │ ├── __init__.py │ ├── datamodule.py │ ├── dataset.py │ └── utils.py ├── inference │ ├── __init__.py │ ├── generation.py │ ├── sampling.py │ └── utils.py ├── models │ ├── __init__.py │ ├── autoencoders.py │ ├── blocks.py │ ├── bottleneck.py │ ├── codebook_patterns.py │ ├── conditioners.py │ ├── diffusion.py │ ├── dit.py │ ├── embeddings.py │ ├── factory.py │ ├── local_attention.py │ ├── mmdit.py │ ├── pretrained.py │ ├── pretransforms.py │ ├── transformer.py │ ├── transformer_layers.py │ └── utils.py └── training │ ├── __init__.py │ ├── autoencoders.py │ ├── diffusion.py │ ├── factory.py │ ├── losses │ ├── __init__.py │ ├── auraloss.py │ └── losses.py │ └── utils.py ├── app.py ├── cache_manager.py ├── cog.yaml ├── cog_predict.py ├── cot_vgg_demo_caption.txt ├── data_utils ├── ext │ └── synchformer │ │ ├── LICENSE │ │ ├── __init__.py │ │ ├── divided_224_16x4.yaml │ │ ├── motionformer.py │ │ ├── synchformer.py │ │ ├── utils.py │ │ ├── video_model_builder.py │ │ └── vit_helper.py └── v2a_utils │ ├── __init__.py │ ├── audio_text_dataset.py │ ├── audioset_224.py │ ├── audioset_video_224.py │ ├── feature_utils_224.py │ ├── vggsound.py │ ├── vggsound_224.py │ ├── vggsound_224_no_audio.py │ ├── vggsound_224_no_sync.py │ └── vggsound_text.py ├── defaults.ini ├── demo_test.csv ├── eval_batch.py ├── extract_latents.py ├── predict.py ├── pyproject.toml ├── requirements.txt ├── scripts ├── demo.sh ├── eval_batch.sh └── infer.sh ├── setup.py └── unwrap.py /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/.dockerignore -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/README.md -------------------------------------------------------------------------------- /ThinkSound/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/__init__.py -------------------------------------------------------------------------------- /ThinkSound/configs/model_configs/stable_audio_2_0_vae.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/configs/model_configs/stable_audio_2_0_vae.json -------------------------------------------------------------------------------- /ThinkSound/configs/model_configs/thinksound.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/configs/model_configs/thinksound.json -------------------------------------------------------------------------------- /ThinkSound/configs/multimodal_dataset_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/configs/multimodal_dataset_demo.json -------------------------------------------------------------------------------- /ThinkSound/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ThinkSound/data/datamodule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/data/datamodule.py -------------------------------------------------------------------------------- /ThinkSound/data/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/data/dataset.py -------------------------------------------------------------------------------- /ThinkSound/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/data/utils.py -------------------------------------------------------------------------------- /ThinkSound/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ThinkSound/inference/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/inference/generation.py -------------------------------------------------------------------------------- /ThinkSound/inference/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/inference/sampling.py -------------------------------------------------------------------------------- /ThinkSound/inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/inference/utils.py -------------------------------------------------------------------------------- /ThinkSound/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/__init__.py -------------------------------------------------------------------------------- /ThinkSound/models/autoencoders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/autoencoders.py -------------------------------------------------------------------------------- /ThinkSound/models/blocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/blocks.py -------------------------------------------------------------------------------- /ThinkSound/models/bottleneck.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/bottleneck.py -------------------------------------------------------------------------------- /ThinkSound/models/codebook_patterns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/codebook_patterns.py -------------------------------------------------------------------------------- /ThinkSound/models/conditioners.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/conditioners.py -------------------------------------------------------------------------------- /ThinkSound/models/diffusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/diffusion.py -------------------------------------------------------------------------------- /ThinkSound/models/dit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/dit.py -------------------------------------------------------------------------------- /ThinkSound/models/embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/embeddings.py -------------------------------------------------------------------------------- /ThinkSound/models/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/factory.py -------------------------------------------------------------------------------- /ThinkSound/models/local_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/local_attention.py -------------------------------------------------------------------------------- /ThinkSound/models/mmdit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/mmdit.py -------------------------------------------------------------------------------- /ThinkSound/models/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/pretrained.py -------------------------------------------------------------------------------- /ThinkSound/models/pretransforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/pretransforms.py -------------------------------------------------------------------------------- /ThinkSound/models/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/transformer.py -------------------------------------------------------------------------------- /ThinkSound/models/transformer_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/transformer_layers.py -------------------------------------------------------------------------------- /ThinkSound/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/models/utils.py -------------------------------------------------------------------------------- /ThinkSound/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/training/__init__.py -------------------------------------------------------------------------------- /ThinkSound/training/autoencoders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/training/autoencoders.py -------------------------------------------------------------------------------- /ThinkSound/training/diffusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/training/diffusion.py -------------------------------------------------------------------------------- /ThinkSound/training/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/training/factory.py -------------------------------------------------------------------------------- /ThinkSound/training/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import * -------------------------------------------------------------------------------- /ThinkSound/training/losses/auraloss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/training/losses/auraloss.py -------------------------------------------------------------------------------- /ThinkSound/training/losses/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/training/losses/losses.py -------------------------------------------------------------------------------- /ThinkSound/training/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/ThinkSound/training/utils.py -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/app.py -------------------------------------------------------------------------------- /cache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/cache_manager.py -------------------------------------------------------------------------------- /cog.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/cog.yaml -------------------------------------------------------------------------------- /cog_predict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/cog_predict.py -------------------------------------------------------------------------------- /cot_vgg_demo_caption.txt: -------------------------------------------------------------------------------- 1 | demo.npz -------------------------------------------------------------------------------- /data_utils/ext/synchformer/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/ext/synchformer/LICENSE -------------------------------------------------------------------------------- /data_utils/ext/synchformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/ext/synchformer/__init__.py -------------------------------------------------------------------------------- /data_utils/ext/synchformer/divided_224_16x4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/ext/synchformer/divided_224_16x4.yaml -------------------------------------------------------------------------------- /data_utils/ext/synchformer/motionformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/ext/synchformer/motionformer.py -------------------------------------------------------------------------------- /data_utils/ext/synchformer/synchformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/ext/synchformer/synchformer.py -------------------------------------------------------------------------------- /data_utils/ext/synchformer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/ext/synchformer/utils.py -------------------------------------------------------------------------------- /data_utils/ext/synchformer/video_model_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/ext/synchformer/video_model_builder.py -------------------------------------------------------------------------------- /data_utils/ext/synchformer/vit_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/ext/synchformer/vit_helper.py -------------------------------------------------------------------------------- /data_utils/v2a_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_utils/v2a_utils/audio_text_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/v2a_utils/audio_text_dataset.py -------------------------------------------------------------------------------- /data_utils/v2a_utils/audioset_224.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/v2a_utils/audioset_224.py -------------------------------------------------------------------------------- /data_utils/v2a_utils/audioset_video_224.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/v2a_utils/audioset_video_224.py -------------------------------------------------------------------------------- /data_utils/v2a_utils/feature_utils_224.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/v2a_utils/feature_utils_224.py -------------------------------------------------------------------------------- /data_utils/v2a_utils/vggsound.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/v2a_utils/vggsound.py -------------------------------------------------------------------------------- /data_utils/v2a_utils/vggsound_224.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/v2a_utils/vggsound_224.py -------------------------------------------------------------------------------- /data_utils/v2a_utils/vggsound_224_no_audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/v2a_utils/vggsound_224_no_audio.py -------------------------------------------------------------------------------- /data_utils/v2a_utils/vggsound_224_no_sync.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/v2a_utils/vggsound_224_no_sync.py -------------------------------------------------------------------------------- /data_utils/v2a_utils/vggsound_text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/data_utils/v2a_utils/vggsound_text.py -------------------------------------------------------------------------------- /defaults.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/defaults.ini -------------------------------------------------------------------------------- /demo_test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/demo_test.csv -------------------------------------------------------------------------------- /eval_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/eval_batch.py -------------------------------------------------------------------------------- /extract_latents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/extract_latents.py -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/predict.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/demo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/scripts/demo.sh -------------------------------------------------------------------------------- /scripts/eval_batch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/scripts/eval_batch.sh -------------------------------------------------------------------------------- /scripts/infer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/scripts/infer.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/setup.py -------------------------------------------------------------------------------- /unwrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsxkib/cog-thinksound/HEAD/unwrap.py --------------------------------------------------------------------------------