├── .DS_Store ├── README.md ├── lass_config └── 2channel_flow.yaml ├── lass_inference.py ├── lass_result ├── exp1_A rocket flies by followed by a loud explosion and fire crackling as a truck engine runs idle_mixture.wav └── mixed │ └── exp1_A rocket flies by followed by a loud explosion and fire crackling as a truck engine runs idle_mixture.wav ├── metadata-master ├── .gitignore ├── processed │ ├── audiocaps_eval.json │ ├── audiocaps_test.json │ ├── audiocaps_train.json │ ├── class_labels_indices.csv │ └── dataset_root.json └── test_exist.py ├── src ├── .DS_Store ├── __init__.py ├── __pycache__ │ └── __init__.cpython-39.pyc ├── bigvgan │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── model.cpython-39.pyc │ │ ├── models.cpython-38.pyc │ │ ├── models.cpython-39.pyc │ │ ├── models_v2.cpython-38.pyc │ │ └── models_v2.cpython-39.pyc │ ├── config.json │ ├── g_01000000 │ └── model.py ├── diffusers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── configuration_utils.cpython-38.pyc │ │ ├── image_processor.cpython-38.pyc │ │ ├── loaders.cpython-38.pyc │ │ ├── optimization.cpython-38.pyc │ │ └── training_utils.cpython-38.pyc │ ├── commands │ │ ├── __init__.py │ │ ├── diffusers_cli.py │ │ └── env.py │ ├── configuration_utils.py │ ├── dependency_versions_check.py │ ├── dependency_versions_table.py │ ├── experimental │ │ ├── README.md │ │ ├── __init__.py │ │ └── rl │ │ │ ├── __init__.py │ │ │ └── value_guided_sampling.py │ ├── image_processor.py │ ├── loaders.py │ ├── models │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── attention.cpython-38.pyc │ │ │ ├── attention_processor.cpython-38.pyc │ │ │ ├── autoencoder_kl.cpython-38.pyc │ │ │ ├── controlnet.cpython-38.pyc │ │ │ ├── dual_transformer_2d.cpython-38.pyc │ │ │ ├── embeddings.cpython-38.pyc │ │ │ ├── modeling_utils.cpython-38.pyc │ │ │ ├── prior_transformer.cpython-38.pyc │ │ │ ├── resnet.cpython-38.pyc │ │ │ ├── t5_film_transformer.cpython-38.pyc │ │ │ ├── transformer_2d.cpython-38.pyc │ │ │ ├── transformer_temporal.cpython-38.pyc │ │ │ ├── unet_1d.cpython-38.pyc │ │ │ ├── unet_1d_blocks.cpython-38.pyc │ │ │ ├── unet_2d.cpython-38.pyc │ │ │ ├── unet_2d_blocks.cpython-38.pyc │ │ │ ├── unet_2d_condition.cpython-38.pyc │ │ │ ├── unet_3d_blocks.cpython-38.pyc │ │ │ ├── unet_3d_condition.cpython-38.pyc │ │ │ ├── vae.cpython-38.pyc │ │ │ └── vq_model.cpython-38.pyc │ │ ├── attention.py │ │ ├── attention_flax.py │ │ ├── attention_processor.py │ │ ├── autoencoder_kl.py │ │ ├── controlnet.py │ │ ├── controlnet_flax.py │ │ ├── cross_attention.py │ │ ├── dual_transformer_2d.py │ │ ├── embeddings.py │ │ ├── embeddings_flax.py │ │ ├── modeling_flax_pytorch_utils.py │ │ ├── modeling_flax_utils.py │ │ ├── modeling_pytorch_flax_utils.py │ │ ├── modeling_utils.py │ │ ├── prior_transformer.py │ │ ├── resnet.py │ │ ├── resnet_flax.py │ │ ├── t5_film_transformer.py │ │ ├── transformer_2d.py │ │ ├── transformer_temporal.py │ │ ├── unet_1d.py │ │ ├── unet_1d_blocks.py │ │ ├── unet_2d.py │ │ ├── unet_2d_blocks.py │ │ ├── unet_2d_blocks_flax.py │ │ ├── unet_2d_condition.py │ │ ├── unet_2d_condition_flax.py │ │ ├── unet_3d_blocks.py │ │ ├── unet_3d_condition.py │ │ ├── vae.py │ │ ├── vae_flax.py │ │ └── vq_model.py │ ├── optimization.py │ ├── pipeline_utils.py │ ├── pipelines │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ └── pipeline_utils.cpython-38.pyc │ │ ├── alt_diffusion │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── modeling_roberta_series.cpython-38.pyc │ │ │ │ ├── pipeline_alt_diffusion.cpython-38.pyc │ │ │ │ └── pipeline_alt_diffusion_img2img.cpython-38.pyc │ │ │ ├── modeling_roberta_series.py │ │ │ ├── pipeline_alt_diffusion.py │ │ │ └── pipeline_alt_diffusion_img2img.py │ │ ├── audio_diffusion │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── mel.cpython-38.pyc │ │ │ │ └── pipeline_audio_diffusion.cpython-38.pyc │ │ │ ├── mel.py │ │ │ └── pipeline_audio_diffusion.py │ │ ├── audioldm │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── pipeline_audioldm.cpython-38.pyc │ │ │ └── pipeline_audioldm.py │ │ ├── controlnet │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── multicontrolnet.cpython-38.pyc │ │ │ │ ├── pipeline_controlnet.cpython-38.pyc │ │ │ │ ├── pipeline_controlnet_img2img.cpython-38.pyc │ │ │ │ └── pipeline_controlnet_inpaint.cpython-38.pyc │ │ │ ├── multicontrolnet.py │ │ │ ├── pipeline_controlnet.py │ │ │ ├── pipeline_controlnet_img2img.py │ │ │ ├── pipeline_controlnet_inpaint.py │ │ │ └── pipeline_flax_controlnet.py │ │ ├── dance_diffusion │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── pipeline_dance_diffusion.cpython-38.pyc │ │ │ └── pipeline_dance_diffusion.py │ │ ├── ddim │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── pipeline_ddim.cpython-38.pyc │ │ │ └── pipeline_ddim.py │ │ ├── ddpm │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── pipeline_ddpm.cpython-38.pyc │ │ │ └── pipeline_ddpm.py │ │ ├── deepfloyd_if │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── pipeline_if.cpython-38.pyc │ │ │ │ ├── pipeline_if_img2img.cpython-38.pyc │ │ │ │ ├── pipeline_if_img2img_superresolution.cpython-38.pyc │ │ │ │ ├── pipeline_if_inpainting.cpython-38.pyc │ │ │ │ ├── pipeline_if_inpainting_superresolution.cpython-38.pyc │ │ │ │ ├── pipeline_if_superresolution.cpython-38.pyc │ │ │ │ ├── safety_checker.cpython-38.pyc │ │ │ │ ├── timesteps.cpython-38.pyc │ │ │ │ └── watermark.cpython-38.pyc │ │ │ ├── pipeline_if.py │ │ │ ├── pipeline_if_img2img.py │ │ │ ├── pipeline_if_img2img_superresolution.py │ │ │ ├── pipeline_if_inpainting.py │ │ │ ├── pipeline_if_inpainting_superresolution.py │ │ │ ├── pipeline_if_superresolution.py │ │ │ ├── safety_checker.py │ │ │ ├── timesteps.py │ │ │ └── watermark.py │ │ ├── dit │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── pipeline_dit.cpython-38.pyc │ │ │ └── pipeline_dit.py │ │ ├── kandinsky │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── pipeline_kandinsky.cpython-38.pyc │ │ │ │ ├── pipeline_kandinsky_img2img.cpython-38.pyc │ │ │ │ ├── pipeline_kandinsky_inpaint.cpython-38.pyc │ │ │ │ ├── pipeline_kandinsky_prior.cpython-38.pyc │ │ │ │ └── text_encoder.cpython-38.pyc │ │ │ ├── pipeline_kandinsky.py │ │ │ ├── pipeline_kandinsky_img2img.py │ │ │ ├── pipeline_kandinsky_inpaint.py │ │ │ ├── pipeline_kandinsky_prior.py │ │ │ └── text_encoder.py │ │ ├── latent_diffusion │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── pipeline_latent_diffusion.cpython-38.pyc │ │ │ │ └── pipeline_latent_diffusion_superresolution.cpython-38.pyc │ │ │ ├── pipeline_latent_diffusion.py │ │ │ └── pipeline_latent_diffusion_superresolution.py │ │ ├── latent_diffusion_uncond │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── pipeline_latent_diffusion_uncond.cpython-38.pyc │ │ │ └── pipeline_latent_diffusion_uncond.py │ │ ├── onnx_utils.py │ │ ├── paint_by_example │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── image_encoder.cpython-38.pyc │ │ │ │ └── pipeline_paint_by_example.cpython-38.pyc │ │ │ ├── image_encoder.py │ │ │ └── pipeline_paint_by_example.py │ │ ├── pipeline_flax_utils.py │ │ ├── pipeline_utils.py │ │ ├── pndm │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── pipeline_pndm.cpython-38.pyc │ │ │ └── pipeline_pndm.py │ │ ├── repaint │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── pipeline_repaint.cpython-38.pyc │ │ │ └── pipeline_repaint.py │ │ ├── score_sde_ve │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── pipeline_score_sde_ve.cpython-38.pyc │ │ │ └── pipeline_score_sde_ve.py │ │ ├── semantic_stable_diffusion │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── pipeline_semantic_stable_diffusion.cpython-38.pyc │ │ │ └── pipeline_semantic_stable_diffusion.py │ │ ├── spectrogram_diffusion │ │ │ ├── __init__.py │ │ │ ├── continous_encoder.py │ │ │ ├── midi_utils.py │ │ │ ├── notes_encoder.py │ │ │ └── pipeline_spectrogram_diffusion.py │ │ ├── stable_diffusion │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── pipeline_cycle_diffusion.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_attend_and_excite.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_depth2img.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_diffedit.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_image_variation.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_img2img.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_inpaint.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_inpaint_legacy.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_instruct_pix2pix.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_latent_upscale.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_model_editing.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_panorama.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_pix2pix_zero.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_sag.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_upscale.cpython-38.pyc │ │ │ │ ├── pipeline_stable_unclip.cpython-38.pyc │ │ │ │ ├── pipeline_stable_unclip_img2img.cpython-38.pyc │ │ │ │ ├── safety_checker.cpython-38.pyc │ │ │ │ └── stable_unclip_image_normalizer.cpython-38.pyc │ │ │ ├── convert_from_ckpt.py │ │ │ ├── pipeline_cycle_diffusion.py │ │ │ ├── pipeline_flax_stable_diffusion.py │ │ │ ├── pipeline_flax_stable_diffusion_controlnet.py │ │ │ ├── pipeline_flax_stable_diffusion_img2img.py │ │ │ ├── pipeline_flax_stable_diffusion_inpaint.py │ │ │ ├── pipeline_onnx_stable_diffusion.py │ │ │ ├── pipeline_onnx_stable_diffusion_img2img.py │ │ │ ├── pipeline_onnx_stable_diffusion_inpaint.py │ │ │ ├── pipeline_onnx_stable_diffusion_inpaint_legacy.py │ │ │ ├── pipeline_onnx_stable_diffusion_upscale.py │ │ │ ├── pipeline_stable_diffusion.py │ │ │ ├── pipeline_stable_diffusion_attend_and_excite.py │ │ │ ├── pipeline_stable_diffusion_controlnet.py │ │ │ ├── pipeline_stable_diffusion_depth2img.py │ │ │ ├── pipeline_stable_diffusion_diffedit.py │ │ │ ├── pipeline_stable_diffusion_image_variation.py │ │ │ ├── pipeline_stable_diffusion_img2img.py │ │ │ ├── pipeline_stable_diffusion_inpaint.py │ │ │ ├── pipeline_stable_diffusion_inpaint_legacy.py │ │ │ ├── pipeline_stable_diffusion_instruct_pix2pix.py │ │ │ ├── pipeline_stable_diffusion_k_diffusion.py │ │ │ ├── pipeline_stable_diffusion_latent_upscale.py │ │ │ ├── pipeline_stable_diffusion_model_editing.py │ │ │ ├── pipeline_stable_diffusion_panorama.py │ │ │ ├── pipeline_stable_diffusion_pix2pix_zero.py │ │ │ ├── pipeline_stable_diffusion_sag.py │ │ │ ├── pipeline_stable_diffusion_upscale.py │ │ │ ├── pipeline_stable_unclip.py │ │ │ ├── pipeline_stable_unclip_img2img.py │ │ │ ├── safety_checker.py │ │ │ ├── safety_checker_flax.py │ │ │ └── stable_unclip_image_normalizer.py │ │ ├── stable_diffusion_safe │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── pipeline_stable_diffusion_safe.cpython-38.pyc │ │ │ │ └── safety_checker.cpython-38.pyc │ │ │ ├── pipeline_stable_diffusion_safe.py │ │ │ └── safety_checker.py │ │ ├── stochastic_karras_ve │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── pipeline_stochastic_karras_ve.cpython-38.pyc │ │ │ └── pipeline_stochastic_karras_ve.py │ │ ├── text_to_video_synthesis │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── pipeline_text_to_video_synth.cpython-38.pyc │ │ │ │ └── pipeline_text_to_video_zero.cpython-38.pyc │ │ │ ├── pipeline_text_to_video_synth.py │ │ │ └── pipeline_text_to_video_zero.py │ │ ├── unclip │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── pipeline_unclip.cpython-38.pyc │ │ │ │ ├── pipeline_unclip_image_variation.cpython-38.pyc │ │ │ │ └── text_proj.cpython-38.pyc │ │ │ ├── pipeline_unclip.py │ │ │ ├── pipeline_unclip_image_variation.py │ │ │ └── text_proj.py │ │ ├── unidiffuser │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── modeling_text_decoder.cpython-38.pyc │ │ │ │ ├── modeling_uvit.cpython-38.pyc │ │ │ │ └── pipeline_unidiffuser.cpython-38.pyc │ │ │ ├── modeling_text_decoder.py │ │ │ ├── modeling_uvit.py │ │ │ └── pipeline_unidiffuser.py │ │ ├── versatile_diffusion │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── modeling_text_unet.cpython-38.pyc │ │ │ │ ├── pipeline_versatile_diffusion.cpython-38.pyc │ │ │ │ ├── pipeline_versatile_diffusion_dual_guided.cpython-38.pyc │ │ │ │ ├── pipeline_versatile_diffusion_image_variation.cpython-38.pyc │ │ │ │ └── pipeline_versatile_diffusion_text_to_image.cpython-38.pyc │ │ │ ├── modeling_text_unet.py │ │ │ ├── pipeline_versatile_diffusion.py │ │ │ ├── pipeline_versatile_diffusion_dual_guided.py │ │ │ ├── pipeline_versatile_diffusion_image_variation.py │ │ │ └── pipeline_versatile_diffusion_text_to_image.py │ │ └── vq_diffusion │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ └── pipeline_vq_diffusion.cpython-38.pyc │ │ │ └── pipeline_vq_diffusion.py │ ├── schedulers │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── scheduling_ddim.cpython-38.pyc │ │ │ ├── scheduling_ddim_inverse.cpython-38.pyc │ │ │ ├── scheduling_ddpm.cpython-38.pyc │ │ │ ├── scheduling_deis_multistep.cpython-38.pyc │ │ │ ├── scheduling_dpmsolver_multistep.cpython-38.pyc │ │ │ ├── scheduling_dpmsolver_multistep_inverse.cpython-38.pyc │ │ │ ├── scheduling_dpmsolver_singlestep.cpython-38.pyc │ │ │ ├── scheduling_euler_ancestral_discrete.cpython-38.pyc │ │ │ ├── scheduling_euler_discrete.cpython-38.pyc │ │ │ ├── scheduling_heun_discrete.cpython-38.pyc │ │ │ ├── scheduling_ipndm.cpython-38.pyc │ │ │ ├── scheduling_k_dpm_2_ancestral_discrete.cpython-38.pyc │ │ │ ├── scheduling_k_dpm_2_discrete.cpython-38.pyc │ │ │ ├── scheduling_karras_ve.cpython-38.pyc │ │ │ ├── scheduling_lms_discrete.cpython-38.pyc │ │ │ ├── scheduling_pndm.cpython-38.pyc │ │ │ ├── scheduling_repaint.cpython-38.pyc │ │ │ ├── scheduling_sde_ve.cpython-38.pyc │ │ │ ├── scheduling_sde_vp.cpython-38.pyc │ │ │ ├── scheduling_unclip.cpython-38.pyc │ │ │ ├── scheduling_unipc_multistep.cpython-38.pyc │ │ │ ├── scheduling_utils.cpython-38.pyc │ │ │ └── scheduling_vq_diffusion.cpython-38.pyc │ │ ├── scheduling_ddim.py │ │ ├── scheduling_ddim_flax.py │ │ ├── scheduling_ddim_inverse.py │ │ ├── scheduling_ddpm.py │ │ ├── scheduling_ddpm_flax.py │ │ ├── scheduling_deis_multistep.py │ │ ├── scheduling_dpmsolver_multistep.py │ │ ├── scheduling_dpmsolver_multistep_flax.py │ │ ├── scheduling_dpmsolver_multistep_inverse.py │ │ ├── scheduling_dpmsolver_sde.py │ │ ├── scheduling_dpmsolver_singlestep.py │ │ ├── scheduling_euler_ancestral_discrete.py │ │ ├── scheduling_euler_discrete.py │ │ ├── scheduling_heun_discrete.py │ │ ├── scheduling_ipndm.py │ │ ├── scheduling_k_dpm_2_ancestral_discrete.py │ │ ├── scheduling_k_dpm_2_discrete.py │ │ ├── scheduling_karras_ve.py │ │ ├── scheduling_karras_ve_flax.py │ │ ├── scheduling_lms_discrete.py │ │ ├── scheduling_lms_discrete_flax.py │ │ ├── scheduling_pndm.py │ │ ├── scheduling_pndm_flax.py │ │ ├── scheduling_repaint.py │ │ ├── scheduling_sde_ve.py │ │ ├── scheduling_sde_ve_flax.py │ │ ├── scheduling_sde_vp.py │ │ ├── scheduling_unclip.py │ │ ├── scheduling_unipc_multistep.py │ │ ├── scheduling_utils.py │ │ ├── scheduling_utils_flax.py │ │ └── scheduling_vq_diffusion.py │ ├── training_utils.py │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── accelerate_utils.cpython-38.pyc │ │ ├── constants.cpython-38.pyc │ │ ├── deprecation_utils.cpython-38.pyc │ │ ├── doc_utils.cpython-38.pyc │ │ ├── dummy_flax_and_transformers_objects.cpython-38.pyc │ │ ├── dummy_flax_objects.cpython-38.pyc │ │ ├── dummy_note_seq_objects.cpython-38.pyc │ │ ├── dummy_onnx_objects.cpython-38.pyc │ │ ├── dummy_torch_and_torchsde_objects.cpython-38.pyc │ │ ├── dummy_torch_and_transformers_and_k_diffusion_objects.cpython-38.pyc │ │ ├── dummy_torch_and_transformers_and_onnx_objects.cpython-38.pyc │ │ ├── dummy_transformers_and_torch_and_note_seq_objects.cpython-38.pyc │ │ ├── dynamic_modules_utils.cpython-38.pyc │ │ ├── hub_utils.cpython-38.pyc │ │ ├── import_utils.cpython-38.pyc │ │ ├── logging.cpython-38.pyc │ │ ├── outputs.cpython-38.pyc │ │ ├── pil_utils.cpython-38.pyc │ │ ├── testing_utils.cpython-38.pyc │ │ └── torch_utils.cpython-38.pyc │ │ ├── accelerate_utils.py │ │ ├── constants.py │ │ ├── deprecation_utils.py │ │ ├── doc_utils.py │ │ ├── dummy_flax_and_transformers_objects.py │ │ ├── dummy_flax_objects.py │ │ ├── dummy_note_seq_objects.py │ │ ├── dummy_onnx_objects.py │ │ ├── dummy_pt_objects.py │ │ ├── dummy_torch_and_librosa_objects.py │ │ ├── dummy_torch_and_scipy_objects.py │ │ ├── dummy_torch_and_torchsde_objects.py │ │ ├── dummy_torch_and_transformers_and_k_diffusion_objects.py │ │ ├── dummy_torch_and_transformers_and_onnx_objects.py │ │ ├── dummy_torch_and_transformers_objects.py │ │ ├── dummy_transformers_and_torch_and_note_seq_objects.py │ │ ├── dynamic_modules_utils.py │ │ ├── hub_utils.py │ │ ├── import_utils.py │ │ ├── logging.py │ │ ├── model_card_template.md │ │ ├── outputs.py │ │ ├── pil_utils.py │ │ ├── testing_utils.py │ │ └── torch_utils.py ├── latent_diffusion │ ├── .DS_Store │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── util.cpython-38.pyc │ │ └── util.cpython-39.pyc │ ├── lr_scheduler.py │ ├── models │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── ddim.cpython-38.pyc │ │ │ ├── ddim.cpython-39.pyc │ │ │ ├── ddpm.cpython-38.pyc │ │ │ ├── ddpm.cpython-39.pyc │ │ │ ├── ddpm_flow.cpython-39.pyc │ │ │ ├── plms.cpython-38.pyc │ │ │ └── plms.cpython-39.pyc │ │ ├── ddpm_flow.py │ │ ├── dpm_solver │ │ │ ├── __init__.py │ │ │ ├── dpm_solver.py │ │ │ └── sampler.py │ │ └── plms.py │ ├── modules │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── attention.cpython-38.pyc │ │ │ ├── attention.cpython-39.pyc │ │ │ ├── ema.cpython-38.pyc │ │ │ └── ema.cpython-39.pyc │ │ ├── attention.py │ │ ├── audiomae │ │ │ ├── AudioMAE.py │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── AudioMAE.cpython-38.pyc │ │ │ │ ├── AudioMAE.cpython-39.pyc │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ ├── models_mae.cpython-38.pyc │ │ │ │ ├── models_mae.cpython-39.pyc │ │ │ │ ├── models_vit.cpython-38.pyc │ │ │ │ └── models_vit.cpython-39.pyc │ │ │ ├── audiovisual_dataset.py │ │ │ ├── example.py │ │ │ ├── models_mae.py │ │ │ ├── models_vit.py │ │ │ ├── requirements.txt │ │ │ └── util │ │ │ │ ├── __pycache__ │ │ │ │ ├── patch_embed.cpython-38.pyc │ │ │ │ ├── patch_embed.cpython-39.pyc │ │ │ │ ├── pos_embed.cpython-38.pyc │ │ │ │ └── pos_embed.cpython-39.pyc │ │ │ │ ├── crop.py │ │ │ │ ├── datasets.py │ │ │ │ ├── lars.py │ │ │ │ ├── lr_decay.py │ │ │ │ ├── lr_sched.py │ │ │ │ ├── misc.py │ │ │ │ ├── patch_embed.py │ │ │ │ ├── pos_embed.py │ │ │ │ └── stat.py │ │ ├── diffusers_unet.py │ │ ├── diffusionmodules │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ ├── model.cpython-38.pyc │ │ │ │ ├── model.cpython-39.pyc │ │ │ │ ├── openaimodel.cpython-38.pyc │ │ │ │ ├── openaimodel.cpython-39.pyc │ │ │ │ ├── util.cpython-38.pyc │ │ │ │ └── util.cpython-39.pyc │ │ │ ├── model.py │ │ │ ├── openaimodel.py │ │ │ ├── openaimodel_new.py │ │ │ └── util.py │ │ ├── distributions │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ ├── distributions.cpython-38.pyc │ │ │ │ └── distributions.cpython-39.pyc │ │ │ └── distributions.py │ │ ├── dprnn.py │ │ ├── dprtnet.py │ │ ├── dptnet.py │ │ ├── ema.py │ │ ├── encoders │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ ├── modules.cpython-38.pyc │ │ │ │ └── modules.cpython-39.pyc │ │ │ └── modules.py │ │ ├── losses │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ ├── contperceptual.cpython-38.pyc │ │ │ │ ├── contperceptual.cpython-39.pyc │ │ │ │ ├── waveform_contperceptual.cpython-38.pyc │ │ │ │ ├── waveform_contperceptual.cpython-39.pyc │ │ │ │ ├── waveform_contperceptual_panns.cpython-38.pyc │ │ │ │ └── waveform_contperceptual_panns.cpython-39.pyc │ │ │ ├── contperceptual.py │ │ │ ├── panns_distance │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ │ ├── distance.cpython-38.pyc │ │ │ │ │ └── distance.cpython-39.pyc │ │ │ │ ├── distance.py │ │ │ │ └── model │ │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── models.cpython-38.pyc │ │ │ │ │ ├── models.cpython-39.pyc │ │ │ │ │ ├── pytorch_utils.cpython-38.pyc │ │ │ │ │ └── pytorch_utils.cpython-39.pyc │ │ │ │ │ ├── config.py │ │ │ │ │ ├── evaluate.py │ │ │ │ │ ├── finetune_template.py │ │ │ │ │ ├── inference.py │ │ │ │ │ ├── losses.py │ │ │ │ │ ├── main.py │ │ │ │ │ ├── models.py │ │ │ │ │ ├── pytorch_utils.py │ │ │ │ │ └── utilities.py │ │ │ ├── vqperceptual.py │ │ │ ├── waveform_contperceptual.py │ │ │ └── waveform_contperceptual_panns.py │ │ ├── nn.py │ │ ├── phoneme_encoder │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ ├── attentions.cpython-38.pyc │ │ │ │ ├── attentions.cpython-39.pyc │ │ │ │ ├── commons.cpython-38.pyc │ │ │ │ ├── commons.cpython-39.pyc │ │ │ │ ├── encoder.cpython-38.pyc │ │ │ │ └── encoder.cpython-39.pyc │ │ │ ├── attentions.py │ │ │ ├── commons.py │ │ │ └── encoder.py │ │ └── x_transformer.py │ └── util.py ├── latent_encoder │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── activations.cpython-38.pyc │ │ ├── autoencoder.cpython-38.pyc │ │ ├── autoencoder.cpython-39.pyc │ │ ├── drumencoder.cpython-38.pyc │ │ └── wavencoder.cpython-38.pyc │ ├── activations.py │ ├── alias_free_torch │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── act.cpython-38.pyc │ │ │ ├── act.cpython-39.pyc │ │ │ ├── filter.cpython-38.pyc │ │ │ ├── filter.cpython-39.pyc │ │ │ ├── resample.cpython-38.pyc │ │ │ └── resample.cpython-39.pyc │ │ ├── act.py │ │ ├── filter.py │ │ └── resample.py │ ├── autoencoder.py │ └── wavedecoder │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── decoder.cpython-38.pyc │ │ └── decoder.cpython-39.pyc │ │ └── decoder.py └── utilities │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-38.pyc │ ├── __init__.cpython-39.pyc │ ├── model.cpython-38.pyc │ ├── model.cpython-39.pyc │ ├── tools.cpython-38.pyc │ └── tools.cpython-39.pyc │ ├── audio │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── audio_processing.cpython-38.pyc │ │ ├── audio_processing.cpython-39.pyc │ │ ├── stft.cpython-38.pyc │ │ ├── stft.cpython-39.pyc │ │ ├── tools.cpython-38.pyc │ │ └── tools.cpython-39.pyc │ ├── audio_processing.py │ ├── stft.py │ └── tools.py │ ├── data │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── add_on.cpython-38.pyc │ │ ├── add_on.cpython-39.pyc │ │ ├── big_vgan_mel.cpython-38.pyc │ │ ├── big_vgan_mel.cpython-39.pyc │ │ ├── dataset.cpython-38.pyc │ │ └── dataset.cpython-39.pyc │ ├── add_on.py │ ├── big_vgan_mel.py │ └── dataset.py │ ├── model.py │ ├── sampler.py │ └── tools.py ├── taming └── modules │ └── autoencoder │ └── lpips │ └── vgg.pth ├── train_latent_diffusion.py └── val_latent_diffusion.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/.DS_Store -------------------------------------------------------------------------------- /lass_result/exp1_A rocket flies by followed by a loud explosion and fire crackling as a truck engine runs idle_mixture.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/lass_result/exp1_A rocket flies by followed by a loud explosion and fire crackling as a truck engine runs idle_mixture.wav -------------------------------------------------------------------------------- /lass_result/mixed/exp1_A rocket flies by followed by a loud explosion and fire crackling as a truck engine runs idle_mixture.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/lass_result/mixed/exp1_A rocket flies by followed by a loud explosion and fire crackling as a truck engine runs idle_mixture.wav -------------------------------------------------------------------------------- /metadata-master/.gitignore: -------------------------------------------------------------------------------- 1 | *.wav 2 | unprocessed 3 | data 4 | *.tar 5 | *copy -------------------------------------------------------------------------------- /metadata-master/processed/dataset_root.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata":{ 3 | "path": { 4 | "audiocaps":{ 5 | "train":"metadata-master/processed/audiocaps_train.json", 6 | "test":"metadata-master/processed/audiocaps_test.json", 7 | "val":"metadata-master/processed/audiocaps_eval.json", 8 | "class_label_indices": "metadata-master/processed/class_labels_indices.csv" 9 | } 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /metadata-master/test_exist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from tqdm import tqdm 4 | import ipdb 5 | 6 | jsonpath = "/mnt/bn/arnold-yy-audiodata/audioldm/metadata-master/processed/retrival_trainable/retrival_trainable_train_len50.json" 7 | 8 | jsondata=[json.loads(line) for line in open(jsonpath, 'r')] 9 | 10 | new_root = "/mnt/bn/lqhaoheliu/datasets/audiocaps/audios/train/" 11 | 12 | jsonlen = len(jsondata) 13 | 14 | 15 | # fw = open("/mnt/bn/arnold-yy-audiodata/audioldm/metadata-master/processed/retrival_trainable/retrival_trainable_train_len50.json", 'w', encoding='utf-8') 16 | count=0 17 | for i in tqdm(range(len(jsondata))): 18 | each = jsondata[i] 19 | wav = each["wav"] 20 | wav = new_root + wav[71:] 21 | 22 | if os.path.exists(wav): 23 | # label = each["label"] 24 | # caption = each["caption"] 25 | # score_list = each["score_list"] 26 | 27 | # new_writen = { 28 | # "wav":wav, 29 | # "label":label, 30 | # "caption":caption, 31 | # "score_list": score_list 32 | # } 33 | 34 | # json.dump(new_writen, fw) 35 | # fw.write("\n") 36 | 37 | count+=1 38 | 39 | print(f"the overall len is {jsonlen} and exit file num is {count}") 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/.DS_Store -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/__init__.py -------------------------------------------------------------------------------- /src/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/bigvgan/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import BigVGAN 2 | 3 | # from .models_v2 import Generator 4 | 5 | 6 | class AttrDict(dict): 7 | def __init__(self, *args, **kwargs): 8 | super(AttrDict, self).__init__(*args, **kwargs) 9 | self.__dict__ = self 10 | -------------------------------------------------------------------------------- /src/bigvgan/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/bigvgan/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/bigvgan/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/bigvgan/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/bigvgan/__pycache__/model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/bigvgan/__pycache__/model.cpython-39.pyc -------------------------------------------------------------------------------- /src/bigvgan/__pycache__/models.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/bigvgan/__pycache__/models.cpython-38.pyc -------------------------------------------------------------------------------- /src/bigvgan/__pycache__/models.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/bigvgan/__pycache__/models.cpython-39.pyc -------------------------------------------------------------------------------- /src/bigvgan/__pycache__/models_v2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/bigvgan/__pycache__/models_v2.cpython-38.pyc -------------------------------------------------------------------------------- /src/bigvgan/__pycache__/models_v2.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/bigvgan/__pycache__/models_v2.cpython-39.pyc -------------------------------------------------------------------------------- /src/bigvgan/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "resblock": "1", 3 | "num_gpus": 0, 4 | "batch_size": 64, 5 | "learning_rate": 0.0001, 6 | "adam_b1": 0.8, 7 | "adam_b2": 0.99, 8 | "lr_decay": 0.999, 9 | "seed": 1234, 10 | "waveloss": "None", 11 | "mel_low": "None", 12 | 13 | "upsample_rates": [5,4,2,2,2], 14 | "upsample_kernel_sizes": [16,16,8,4,4], 15 | "upsample_initial_channel": 512, 16 | "resblock_kernel_sizes": [3,7,11], 17 | "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], 18 | 19 | "activation": "snakebeta", 20 | "snake_logscale": true, 21 | 22 | "resolutions": [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]], 23 | "mpd_reshapes": [2, 3, 5, 7, 11], 24 | "use_spectral_norm": false, 25 | "discriminator_channel_mult": 1, 26 | 27 | "segment_size": 8192, 28 | "num_mels": 64, 29 | "num_freq": 1025, 30 | "n_fft": 1024, 31 | "hop_size": 160, 32 | "win_size": 1024, 33 | 34 | "sampling_rate": 16000, 35 | 36 | "fmin": 0, 37 | "fmax": 8000, 38 | "fmax_for_loss": null, 39 | 40 | "num_workers": 16, 41 | 42 | "dist_config": { 43 | "dist_backend": "nccl", 44 | "dist_url": "tcp://localhost:54321", 45 | "world_size": 1 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/bigvgan/g_01000000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/bigvgan/g_01000000 -------------------------------------------------------------------------------- /src/diffusers/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/__pycache__/configuration_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/__pycache__/configuration_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/__pycache__/image_processor.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/__pycache__/image_processor.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/__pycache__/loaders.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/__pycache__/loaders.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/__pycache__/optimization.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/__pycache__/optimization.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/__pycache__/training_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/__pycache__/training_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from argparse import ArgumentParser 17 | 18 | 19 | class BaseDiffusersCLICommand(ABC): 20 | @staticmethod 21 | @abstractmethod 22 | def register_subcommand(parser: ArgumentParser): 23 | raise NotImplementedError() 24 | 25 | @abstractmethod 26 | def run(self): 27 | raise NotImplementedError() 28 | -------------------------------------------------------------------------------- /src/diffusers/commands/diffusers_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from argparse import ArgumentParser 17 | 18 | from .env import EnvironmentCommand 19 | 20 | 21 | def main(): 22 | parser = ArgumentParser("Diffusers CLI tool", usage="diffusers-cli []") 23 | commands_parser = parser.add_subparsers(help="diffusers-cli command helpers") 24 | 25 | # Register commands 26 | EnvironmentCommand.register_subcommand(commands_parser) 27 | 28 | # Let's go 29 | args = parser.parse_args() 30 | 31 | if not hasattr(args, "func"): 32 | parser.print_help() 33 | exit(1) 34 | 35 | # Run 36 | service = args.func(args) 37 | service.run() 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /src/diffusers/commands/env.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import platform 16 | from argparse import ArgumentParser 17 | 18 | import huggingface_hub 19 | 20 | from .. import __version__ as version 21 | from ..utils import is_accelerate_available, is_torch_available, is_transformers_available, is_xformers_available 22 | from . import BaseDiffusersCLICommand 23 | 24 | 25 | def info_command_factory(_): 26 | return EnvironmentCommand() 27 | 28 | 29 | class EnvironmentCommand(BaseDiffusersCLICommand): 30 | @staticmethod 31 | def register_subcommand(parser: ArgumentParser): 32 | download_parser = parser.add_parser("env") 33 | download_parser.set_defaults(func=info_command_factory) 34 | 35 | def run(self): 36 | hub_version = huggingface_hub.__version__ 37 | 38 | pt_version = "not installed" 39 | pt_cuda_available = "NA" 40 | if is_torch_available(): 41 | import torch 42 | 43 | pt_version = torch.__version__ 44 | pt_cuda_available = torch.cuda.is_available() 45 | 46 | transformers_version = "not installed" 47 | if is_transformers_available(): 48 | import transformers 49 | 50 | transformers_version = transformers.__version__ 51 | 52 | accelerate_version = "not installed" 53 | if is_accelerate_available(): 54 | import accelerate 55 | 56 | accelerate_version = accelerate.__version__ 57 | 58 | xformers_version = "not installed" 59 | if is_xformers_available(): 60 | import xformers 61 | 62 | xformers_version = xformers.__version__ 63 | 64 | info = { 65 | "`diffusers` version": version, 66 | "Platform": platform.platform(), 67 | "Python version": platform.python_version(), 68 | "PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})", 69 | "Huggingface_hub version": hub_version, 70 | "Transformers version": transformers_version, 71 | "Accelerate version": accelerate_version, 72 | "xFormers version": xformers_version, 73 | "Using GPU in script?": "", 74 | "Using distributed or parallel set-up in script?": "", 75 | } 76 | 77 | print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n") 78 | print(self.format_dict(info)) 79 | 80 | return info 81 | 82 | @staticmethod 83 | def format_dict(d): 84 | return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n" 85 | -------------------------------------------------------------------------------- /src/diffusers/dependency_versions_check.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import sys 15 | 16 | from .dependency_versions_table import deps 17 | from .utils.versions import require_version, require_version_core 18 | 19 | 20 | # define which module versions we always want to check at run time 21 | # (usually the ones defined in `install_requires` in setup.py) 22 | # 23 | # order specific notes: 24 | # - tqdm must be checked before tokenizers 25 | 26 | pkgs_to_check_at_runtime = "python tqdm regex requests packaging filelock numpy tokenizers".split() 27 | if sys.version_info < (3, 7): 28 | pkgs_to_check_at_runtime.append("dataclasses") 29 | if sys.version_info < (3, 8): 30 | pkgs_to_check_at_runtime.append("importlib_metadata") 31 | 32 | for pkg in pkgs_to_check_at_runtime: 33 | if pkg in deps: 34 | if pkg == "tokenizers": 35 | # must be loaded here, or else tqdm check may fail 36 | from .utils import is_tokenizers_available 37 | 38 | if not is_tokenizers_available(): 39 | continue # not required, check version only if installed 40 | 41 | require_version_core(deps[pkg]) 42 | else: 43 | raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py") 44 | 45 | 46 | def dep_version_check(pkg, hint=None): 47 | require_version(deps[pkg], hint) 48 | -------------------------------------------------------------------------------- /src/diffusers/dependency_versions_table.py: -------------------------------------------------------------------------------- 1 | # THIS FILE HAS BEEN AUTOGENERATED. To update: 2 | # 1. modify the `_deps` dict in setup.py 3 | # 2. run `make deps_table_update`` 4 | deps = { 5 | "Pillow": "Pillow", 6 | "accelerate": "accelerate>=0.11.0", 7 | "compel": "compel==0.1.8", 8 | "black": "black~=23.1", 9 | "datasets": "datasets", 10 | "filelock": "filelock", 11 | "flax": "flax>=0.4.1", 12 | "hf-doc-builder": "hf-doc-builder>=0.3.0", 13 | "huggingface-hub": "huggingface-hub>=0.13.2", 14 | "requests-mock": "requests-mock==1.10.0", 15 | "importlib_metadata": "importlib_metadata", 16 | "isort": "isort>=5.5.4", 17 | "jax": "jax>=0.2.8,!=0.3.2", 18 | "jaxlib": "jaxlib>=0.1.65", 19 | "Jinja2": "Jinja2", 20 | "k-diffusion": "k-diffusion>=0.0.12", 21 | "librosa": "librosa", 22 | "numpy": "numpy", 23 | "omegaconf": "omegaconf", 24 | "parameterized": "parameterized", 25 | "protobuf": "protobuf>=3.20.3,<4", 26 | "pytest": "pytest", 27 | "pytest-timeout": "pytest-timeout", 28 | "pytest-xdist": "pytest-xdist", 29 | "ruff": "ruff>=0.0.241", 30 | "safetensors": "safetensors", 31 | "sentencepiece": "sentencepiece>=0.1.91,!=0.1.92", 32 | "scipy": "scipy", 33 | "regex": "regex!=2019.12.17", 34 | "requests": "requests", 35 | "tensorboard": "tensorboard", 36 | "torch": "torch>=1.4", 37 | "torchvision": "torchvision", 38 | "transformers": "transformers>=4.25.1", 39 | "urllib3": "urllib3<=2.0.0", 40 | } 41 | -------------------------------------------------------------------------------- /src/diffusers/experimental/README.md: -------------------------------------------------------------------------------- 1 | # 🧨 Diffusers Experimental 2 | 3 | We are adding experimental code to support novel applications and usages of the Diffusers library. 4 | Currently, the following experiments are supported: 5 | * Reinforcement learning via an implementation of the [Diffuser](https://arxiv.org/abs/2205.09991) model. -------------------------------------------------------------------------------- /src/diffusers/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | from .rl import ValueGuidedRLPipeline 2 | -------------------------------------------------------------------------------- /src/diffusers/experimental/rl/__init__.py: -------------------------------------------------------------------------------- 1 | from .value_guided_sampling import ValueGuidedRLPipeline 2 | -------------------------------------------------------------------------------- /src/diffusers/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | 3 | For more detail on the models, please refer to the [docs](https://huggingface.co/docs/diffusers/api/models). -------------------------------------------------------------------------------- /src/diffusers/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ..utils import is_flax_available, is_torch_available 16 | 17 | 18 | if is_torch_available(): 19 | from .autoencoder_kl import AutoencoderKL 20 | from .controlnet import ControlNetModel 21 | from .dual_transformer_2d import DualTransformer2DModel 22 | from .modeling_utils import ModelMixin 23 | from .prior_transformer import PriorTransformer 24 | from .t5_film_transformer import T5FilmDecoder 25 | from .transformer_2d import Transformer2DModel 26 | from .unet_1d import UNet1DModel 27 | from .unet_2d import UNet2DModel 28 | from .unet_2d_condition import UNet2DConditionModel 29 | from .unet_3d_condition import UNet3DConditionModel 30 | from .vq_model import VQModel 31 | 32 | if is_flax_available(): 33 | from .controlnet_flax import FlaxControlNetModel 34 | from .unet_2d_condition_flax import FlaxUNet2DConditionModel 35 | from .vae_flax import FlaxAutoencoderKL 36 | -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/attention.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/attention.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/attention_processor.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/attention_processor.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/autoencoder_kl.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/autoencoder_kl.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/controlnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/controlnet.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/dual_transformer_2d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/dual_transformer_2d.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/embeddings.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/embeddings.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/modeling_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/modeling_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/prior_transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/prior_transformer.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/resnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/resnet.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/t5_film_transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/t5_film_transformer.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/transformer_2d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/transformer_2d.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/transformer_temporal.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/transformer_temporal.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/unet_1d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/unet_1d.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/unet_1d_blocks.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/unet_1d_blocks.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/unet_2d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/unet_2d.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/unet_2d_blocks.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/unet_2d_blocks.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/unet_2d_condition.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/unet_2d_condition.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/unet_3d_blocks.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/unet_3d_blocks.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/unet_3d_condition.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/unet_3d_condition.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/vae.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/vae.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/models/__pycache__/vq_model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/models/__pycache__/vq_model.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipeline_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | 14 | # limitations under the License. 15 | 16 | # NOTE: This file is deprecated and will be removed in a future version. 17 | # It only exists so that temporarely `from diffusers.pipelines import DiffusionPipeline` works 18 | 19 | from .pipelines import DiffusionPipeline, ImagePipelineOutput # noqa: F401 20 | from .utils import deprecate 21 | 22 | 23 | deprecate( 24 | "pipelines_utils", 25 | "0.22.0", 26 | "Importing `DiffusionPipeline` or `ImagePipelineOutput` from diffusers.pipeline_utils is deprecated. Please import from diffusers.pipelines.pipeline_utils instead.", 27 | standard_warn=False, 28 | stacklevel=3, 29 | ) 30 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/__pycache__/pipeline_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/__pycache__/pipeline_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/alt_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional, Union 3 | 4 | import numpy as np 5 | import PIL 6 | from PIL import Image 7 | 8 | from ...utils import BaseOutput, is_torch_available, is_transformers_available 9 | 10 | 11 | @dataclass 12 | # Copied from diffusers.pipelines.stable_diffusion.__init__.StableDiffusionPipelineOutput with Stable->Alt 13 | class AltDiffusionPipelineOutput(BaseOutput): 14 | """ 15 | Output class for Alt Diffusion pipelines. 16 | 17 | Args: 18 | images (`List[PIL.Image.Image]` or `np.ndarray`) 19 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, 20 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. 21 | nsfw_content_detected (`List[bool]`) 22 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work" 23 | (nsfw) content, or `None` if safety checking could not be performed. 24 | """ 25 | 26 | images: Union[List[PIL.Image.Image], np.ndarray] 27 | nsfw_content_detected: Optional[List[bool]] 28 | 29 | 30 | if is_transformers_available() and is_torch_available(): 31 | from .modeling_roberta_series import RobertaSeriesModelWithTransformation 32 | from .pipeline_alt_diffusion import AltDiffusionPipeline 33 | from .pipeline_alt_diffusion_img2img import AltDiffusionImg2ImgPipeline 34 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/alt_diffusion/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/alt_diffusion/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/alt_diffusion/__pycache__/modeling_roberta_series.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/alt_diffusion/__pycache__/modeling_roberta_series.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/alt_diffusion/__pycache__/pipeline_alt_diffusion.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/alt_diffusion/__pycache__/pipeline_alt_diffusion.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/alt_diffusion/__pycache__/pipeline_alt_diffusion_img2img.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/alt_diffusion/__pycache__/pipeline_alt_diffusion_img2img.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/audio_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from .mel import Mel 2 | from .pipeline_audio_diffusion import AudioDiffusionPipeline 3 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/audio_diffusion/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/audio_diffusion/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/audio_diffusion/__pycache__/mel.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/audio_diffusion/__pycache__/mel.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/audio_diffusion/__pycache__/pipeline_audio_diffusion.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/audio_diffusion/__pycache__/pipeline_audio_diffusion.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/audioldm/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_torch_available, 4 | is_transformers_available, 5 | is_transformers_version, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import ( 14 | AudioLDMPipeline, 15 | ) 16 | else: 17 | from .pipeline_audioldm import AudioLDMPipeline 18 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/audioldm/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/audioldm/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/audioldm/__pycache__/pipeline_audioldm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/audioldm/__pycache__/pipeline_audioldm.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/controlnet/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_flax_available, 4 | is_torch_available, 5 | is_transformers_available, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available()): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import * # noqa F403 14 | else: 15 | from .multicontrolnet import MultiControlNetModel 16 | from .pipeline_controlnet import StableDiffusionControlNetPipeline 17 | from .pipeline_controlnet_img2img import StableDiffusionControlNetImg2ImgPipeline 18 | from .pipeline_controlnet_inpaint import StableDiffusionControlNetInpaintPipeline 19 | 20 | 21 | if is_transformers_available() and is_flax_available(): 22 | from .pipeline_flax_controlnet import FlaxStableDiffusionControlNetPipeline 23 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/controlnet/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/controlnet/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/controlnet/__pycache__/multicontrolnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/controlnet/__pycache__/multicontrolnet.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_img2img.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_img2img.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_inpaint.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_inpaint.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/controlnet/multicontrolnet.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Tuple, Union 2 | 3 | import torch 4 | from torch import nn 5 | 6 | from ...models.controlnet import ControlNetModel, ControlNetOutput 7 | from ...models.modeling_utils import ModelMixin 8 | 9 | 10 | class MultiControlNetModel(ModelMixin): 11 | r""" 12 | Multiple `ControlNetModel` wrapper class for Multi-ControlNet 13 | 14 | This module is a wrapper for multiple instances of the `ControlNetModel`. The `forward()` API is designed to be 15 | compatible with `ControlNetModel`. 16 | 17 | Args: 18 | controlnets (`List[ControlNetModel]`): 19 | Provides additional conditioning to the unet during the denoising process. You must set multiple 20 | `ControlNetModel` as a list. 21 | """ 22 | 23 | def __init__(self, controlnets: Union[List[ControlNetModel], Tuple[ControlNetModel]]): 24 | super().__init__() 25 | self.nets = nn.ModuleList(controlnets) 26 | 27 | def forward( 28 | self, 29 | sample: torch.FloatTensor, 30 | timestep: Union[torch.Tensor, float, int], 31 | encoder_hidden_states: torch.Tensor, 32 | controlnet_cond: List[torch.tensor], 33 | conditioning_scale: List[float], 34 | class_labels: Optional[torch.Tensor] = None, 35 | timestep_cond: Optional[torch.Tensor] = None, 36 | attention_mask: Optional[torch.Tensor] = None, 37 | cross_attention_kwargs: Optional[Dict[str, Any]] = None, 38 | guess_mode: bool = False, 39 | return_dict: bool = True, 40 | ) -> Union[ControlNetOutput, Tuple]: 41 | for i, (image, scale, controlnet) in enumerate(zip(controlnet_cond, conditioning_scale, self.nets)): 42 | down_samples, mid_sample = controlnet( 43 | sample, 44 | timestep, 45 | encoder_hidden_states, 46 | image, 47 | scale, 48 | class_labels, 49 | timestep_cond, 50 | attention_mask, 51 | cross_attention_kwargs, 52 | guess_mode, 53 | return_dict, 54 | ) 55 | 56 | # merge samples 57 | if i == 0: 58 | down_block_res_samples, mid_block_res_sample = down_samples, mid_sample 59 | else: 60 | down_block_res_samples = [ 61 | samples_prev + samples_curr 62 | for samples_prev, samples_curr in zip(down_block_res_samples, down_samples) 63 | ] 64 | mid_block_res_sample += mid_sample 65 | 66 | return down_block_res_samples, mid_block_res_sample 67 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/dance_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_dance_diffusion import DanceDiffusionPipeline 2 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/dance_diffusion/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/dance_diffusion/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/dance_diffusion/__pycache__/pipeline_dance_diffusion.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/dance_diffusion/__pycache__/pipeline_dance_diffusion.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/ddim/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_ddim import DDIMPipeline 2 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/ddim/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/ddim/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/ddim/__pycache__/pipeline_ddim.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/ddim/__pycache__/pipeline_ddim.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/ddpm/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_ddpm import DDPMPipeline 2 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/ddpm/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/ddpm/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/ddpm/__pycache__/pipeline_ddpm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/ddpm/__pycache__/pipeline_ddpm.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional, Union 3 | 4 | import numpy as np 5 | import PIL 6 | 7 | from ...utils import BaseOutput, OptionalDependencyNotAvailable, is_torch_available, is_transformers_available 8 | from .timesteps import ( 9 | fast27_timesteps, 10 | smart27_timesteps, 11 | smart50_timesteps, 12 | smart100_timesteps, 13 | smart185_timesteps, 14 | super27_timesteps, 15 | super40_timesteps, 16 | super100_timesteps, 17 | ) 18 | 19 | 20 | @dataclass 21 | class IFPipelineOutput(BaseOutput): 22 | """ 23 | Args: 24 | Output class for Stable Diffusion pipelines. 25 | images (`List[PIL.Image.Image]` or `np.ndarray`) 26 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, 27 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. 28 | nsfw_detected (`List[bool]`) 29 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work" 30 | (nsfw) content or a watermark. `None` if safety checking could not be performed. 31 | watermark_detected (`List[bool]`) 32 | List of flags denoting whether the corresponding generated image likely has a watermark. `None` if safety 33 | checking could not be performed. 34 | """ 35 | 36 | images: Union[List[PIL.Image.Image], np.ndarray] 37 | nsfw_detected: Optional[List[bool]] 38 | watermark_detected: Optional[List[bool]] 39 | 40 | 41 | try: 42 | if not (is_transformers_available() and is_torch_available()): 43 | raise OptionalDependencyNotAvailable() 44 | except OptionalDependencyNotAvailable: 45 | from ...utils.dummy_torch_and_transformers_objects import * # noqa F403 46 | else: 47 | from .pipeline_if import IFPipeline 48 | from .pipeline_if_img2img import IFImg2ImgPipeline 49 | from .pipeline_if_img2img_superresolution import IFImg2ImgSuperResolutionPipeline 50 | from .pipeline_if_inpainting import IFInpaintingPipeline 51 | from .pipeline_if_inpainting_superresolution import IFInpaintingSuperResolutionPipeline 52 | from .pipeline_if_superresolution import IFSuperResolutionPipeline 53 | from .safety_checker import IFSafetyChecker 54 | from .watermark import IFWatermarker 55 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/deepfloyd_if/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_img2img.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_img2img.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_img2img_superresolution.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_img2img_superresolution.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_inpainting.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_inpainting.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_inpainting_superresolution.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_inpainting_superresolution.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_superresolution.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_superresolution.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/__pycache__/safety_checker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/deepfloyd_if/__pycache__/safety_checker.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/__pycache__/timesteps.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/deepfloyd_if/__pycache__/timesteps.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/__pycache__/watermark.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/deepfloyd_if/__pycache__/watermark.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/safety_checker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from transformers import CLIPConfig, CLIPVisionModelWithProjection, PreTrainedModel 5 | 6 | from ...utils import logging 7 | 8 | 9 | logger = logging.get_logger(__name__) 10 | 11 | 12 | class IFSafetyChecker(PreTrainedModel): 13 | config_class = CLIPConfig 14 | 15 | _no_split_modules = ["CLIPEncoderLayer"] 16 | 17 | def __init__(self, config: CLIPConfig): 18 | super().__init__(config) 19 | 20 | self.vision_model = CLIPVisionModelWithProjection(config.vision_config) 21 | 22 | self.p_head = nn.Linear(config.vision_config.projection_dim, 1) 23 | self.w_head = nn.Linear(config.vision_config.projection_dim, 1) 24 | 25 | @torch.no_grad() 26 | def forward(self, clip_input, images, p_threshold=0.5, w_threshold=0.5): 27 | image_embeds = self.vision_model(clip_input)[0] 28 | 29 | nsfw_detected = self.p_head(image_embeds) 30 | nsfw_detected = nsfw_detected.flatten() 31 | nsfw_detected = nsfw_detected > p_threshold 32 | nsfw_detected = nsfw_detected.tolist() 33 | 34 | if any(nsfw_detected): 35 | logger.warning( 36 | "Potential NSFW content was detected in one or more images. A black image will be returned instead." 37 | " Try again with a different prompt and/or seed." 38 | ) 39 | 40 | for idx, nsfw_detected_ in enumerate(nsfw_detected): 41 | if nsfw_detected_: 42 | images[idx] = np.zeros(images[idx].shape) 43 | 44 | watermark_detected = self.w_head(image_embeds) 45 | watermark_detected = watermark_detected.flatten() 46 | watermark_detected = watermark_detected > w_threshold 47 | watermark_detected = watermark_detected.tolist() 48 | 49 | if any(watermark_detected): 50 | logger.warning( 51 | "Potential watermarked content was detected in one or more images. A black image will be returned instead." 52 | " Try again with a different prompt and/or seed." 53 | ) 54 | 55 | for idx, watermark_detected_ in enumerate(watermark_detected): 56 | if watermark_detected_: 57 | images[idx] = np.zeros(images[idx].shape) 58 | 59 | return images, nsfw_detected, watermark_detected 60 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/deepfloyd_if/watermark.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import PIL 4 | import torch 5 | from PIL import Image 6 | 7 | from ...configuration_utils import ConfigMixin 8 | from ...models.modeling_utils import ModelMixin 9 | from ...utils import PIL_INTERPOLATION 10 | 11 | 12 | class IFWatermarker(ModelMixin, ConfigMixin): 13 | def __init__(self): 14 | super().__init__() 15 | 16 | self.register_buffer("watermark_image", torch.zeros((62, 62, 4))) 17 | self.watermark_image_as_pil = None 18 | 19 | def apply_watermark(self, images: List[PIL.Image.Image], sample_size=None): 20 | # copied from https://github.com/deep-floyd/IF/blob/b77482e36ca2031cb94dbca1001fc1e6400bf4ab/deepfloyd_if/modules/base.py#L287 21 | 22 | h = images[0].height 23 | w = images[0].width 24 | 25 | sample_size = sample_size or h 26 | 27 | coef = min(h / sample_size, w / sample_size) 28 | img_h, img_w = (int(h / coef), int(w / coef)) if coef < 1 else (h, w) 29 | 30 | S1, S2 = 1024**2, img_w * img_h 31 | K = (S2 / S1) ** 0.5 32 | wm_size, wm_x, wm_y = int(K * 62), img_w - int(14 * K), img_h - int(14 * K) 33 | 34 | if self.watermark_image_as_pil is None: 35 | watermark_image = self.watermark_image.to(torch.uint8).cpu().numpy() 36 | watermark_image = Image.fromarray(watermark_image, mode="RGBA") 37 | self.watermark_image_as_pil = watermark_image 38 | 39 | wm_img = self.watermark_image_as_pil.resize( 40 | (wm_size, wm_size), PIL_INTERPOLATION["bicubic"], reducing_gap=None 41 | ) 42 | 43 | for pil_img in images: 44 | pil_img.paste(wm_img, box=(wm_x - wm_size, wm_y - wm_size, wm_x, wm_y), mask=wm_img.split()[-1]) 45 | 46 | return images 47 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/dit/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_dit import DiTPipeline 2 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/dit/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/dit/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/dit/__pycache__/pipeline_dit.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/dit/__pycache__/pipeline_dit.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/kandinsky/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_torch_available, 4 | is_transformers_available, 5 | is_transformers_version, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available()): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import KandinskyPipeline, KandinskyPriorPipeline 14 | else: 15 | from .pipeline_kandinsky import KandinskyPipeline 16 | from .pipeline_kandinsky_img2img import KandinskyImg2ImgPipeline 17 | from .pipeline_kandinsky_inpaint import KandinskyInpaintPipeline 18 | from .pipeline_kandinsky_prior import KandinskyPriorPipeline 19 | from .text_encoder import MultilingualCLIP 20 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/kandinsky/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/kandinsky/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky_img2img.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky_img2img.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky_inpaint.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky_inpaint.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky_prior.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky_prior.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/kandinsky/__pycache__/text_encoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/kandinsky/__pycache__/text_encoder.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/kandinsky/text_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from transformers import PreTrainedModel, XLMRobertaConfig, XLMRobertaModel 3 | 4 | 5 | class MCLIPConfig(XLMRobertaConfig): 6 | model_type = "M-CLIP" 7 | 8 | def __init__(self, transformerDimSize=1024, imageDimSize=768, **kwargs): 9 | self.transformerDimensions = transformerDimSize 10 | self.numDims = imageDimSize 11 | super().__init__(**kwargs) 12 | 13 | 14 | class MultilingualCLIP(PreTrainedModel): 15 | config_class = MCLIPConfig 16 | 17 | def __init__(self, config, *args, **kwargs): 18 | super().__init__(config, *args, **kwargs) 19 | self.transformer = XLMRobertaModel(config) 20 | self.LinearTransformation = torch.nn.Linear( 21 | in_features=config.transformerDimensions, out_features=config.numDims 22 | ) 23 | 24 | def forward(self, input_ids, attention_mask): 25 | embs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)[0] 26 | embs2 = (embs * attention_mask.unsqueeze(2)).sum(dim=1) / attention_mask.sum(dim=1)[:, None] 27 | return self.LinearTransformation(embs2), embs 28 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/latent_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import is_transformers_available 2 | from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline 3 | 4 | 5 | if is_transformers_available(): 6 | from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline 7 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/latent_diffusion/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/latent_diffusion/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/latent_diffusion/__pycache__/pipeline_latent_diffusion.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/latent_diffusion/__pycache__/pipeline_latent_diffusion.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/latent_diffusion/__pycache__/pipeline_latent_diffusion_superresolution.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/latent_diffusion/__pycache__/pipeline_latent_diffusion_superresolution.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/latent_diffusion_uncond/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_latent_diffusion_uncond import LDMPipeline 2 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/latent_diffusion_uncond/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/latent_diffusion_uncond/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/latent_diffusion_uncond/__pycache__/pipeline_latent_diffusion_uncond.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/latent_diffusion_uncond/__pycache__/pipeline_latent_diffusion_uncond.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/paint_by_example/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional, Union 3 | 4 | import numpy as np 5 | import PIL 6 | from PIL import Image 7 | 8 | from ...utils import is_torch_available, is_transformers_available 9 | 10 | 11 | if is_transformers_available() and is_torch_available(): 12 | from .image_encoder import PaintByExampleImageEncoder 13 | from .pipeline_paint_by_example import PaintByExamplePipeline 14 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/paint_by_example/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/paint_by_example/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/paint_by_example/__pycache__/image_encoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/paint_by_example/__pycache__/image_encoder.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/paint_by_example/__pycache__/pipeline_paint_by_example.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/paint_by_example/__pycache__/pipeline_paint_by_example.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/paint_by_example/image_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import torch 15 | from torch import nn 16 | from transformers import CLIPPreTrainedModel, CLIPVisionModel 17 | 18 | from ...models.attention import BasicTransformerBlock 19 | from ...utils import logging 20 | 21 | 22 | logger = logging.get_logger(__name__) # pylint: disable=invalid-name 23 | 24 | 25 | class PaintByExampleImageEncoder(CLIPPreTrainedModel): 26 | def __init__(self, config, proj_size=768): 27 | super().__init__(config) 28 | self.proj_size = proj_size 29 | 30 | self.model = CLIPVisionModel(config) 31 | self.mapper = PaintByExampleMapper(config) 32 | self.final_layer_norm = nn.LayerNorm(config.hidden_size) 33 | self.proj_out = nn.Linear(config.hidden_size, self.proj_size) 34 | 35 | # uncondition for scaling 36 | self.uncond_vector = nn.Parameter(torch.randn((1, 1, self.proj_size))) 37 | 38 | def forward(self, pixel_values, return_uncond_vector=False): 39 | clip_output = self.model(pixel_values=pixel_values) 40 | latent_states = clip_output.pooler_output 41 | latent_states = self.mapper(latent_states[:, None]) 42 | latent_states = self.final_layer_norm(latent_states) 43 | latent_states = self.proj_out(latent_states) 44 | if return_uncond_vector: 45 | return latent_states, self.uncond_vector 46 | 47 | return latent_states 48 | 49 | 50 | class PaintByExampleMapper(nn.Module): 51 | def __init__(self, config): 52 | super().__init__() 53 | num_layers = (config.num_hidden_layers + 1) // 5 54 | hid_size = config.hidden_size 55 | num_heads = 1 56 | self.blocks = nn.ModuleList( 57 | [ 58 | BasicTransformerBlock(hid_size, num_heads, hid_size, activation_fn="gelu", attention_bias=True) 59 | for _ in range(num_layers) 60 | ] 61 | ) 62 | 63 | def forward(self, hidden_states): 64 | for block in self.blocks: 65 | hidden_states = block(hidden_states) 66 | 67 | return hidden_states 68 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/pndm/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_pndm import PNDMPipeline 2 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/pndm/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/pndm/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/pndm/__pycache__/pipeline_pndm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/pndm/__pycache__/pipeline_pndm.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/repaint/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_repaint import RePaintPipeline 2 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/repaint/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/repaint/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/repaint/__pycache__/pipeline_repaint.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/repaint/__pycache__/pipeline_repaint.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/score_sde_ve/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_score_sde_ve import ScoreSdeVePipeline 2 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/score_sde_ve/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/score_sde_ve/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/score_sde_ve/__pycache__/pipeline_score_sde_ve.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/score_sde_ve/__pycache__/pipeline_score_sde_ve.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/semantic_stable_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from enum import Enum 3 | from typing import List, Optional, Union 4 | 5 | import numpy as np 6 | import PIL 7 | from PIL import Image 8 | 9 | from ...utils import BaseOutput, is_torch_available, is_transformers_available 10 | 11 | 12 | @dataclass 13 | class SemanticStableDiffusionPipelineOutput(BaseOutput): 14 | """ 15 | Output class for Stable Diffusion pipelines. 16 | 17 | Args: 18 | images (`List[PIL.Image.Image]` or `np.ndarray`) 19 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, 20 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. 21 | nsfw_content_detected (`List[bool]`) 22 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work" 23 | (nsfw) content, or `None` if safety checking could not be performed. 24 | """ 25 | 26 | images: Union[List[PIL.Image.Image], np.ndarray] 27 | nsfw_content_detected: Optional[List[bool]] 28 | 29 | 30 | if is_transformers_available() and is_torch_available(): 31 | from .pipeline_semantic_stable_diffusion import SemanticStableDiffusionPipeline 32 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/semantic_stable_diffusion/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/semantic_stable_diffusion/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/semantic_stable_diffusion/__pycache__/pipeline_semantic_stable_diffusion.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/semantic_stable_diffusion/__pycache__/pipeline_semantic_stable_diffusion.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/spectrogram_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from ...utils import is_note_seq_available, is_transformers_available, is_torch_available 3 | from ...utils import OptionalDependencyNotAvailable 4 | 5 | 6 | try: 7 | if not (is_transformers_available() and is_torch_available()): 8 | raise OptionalDependencyNotAvailable() 9 | except OptionalDependencyNotAvailable: 10 | from ...utils.dummy_torch_and_transformers_objects import * # noqa F403 11 | else: 12 | from .notes_encoder import SpectrogramNotesEncoder 13 | from .continous_encoder import SpectrogramContEncoder 14 | from .pipeline_spectrogram_diffusion import ( 15 | SpectrogramContEncoder, 16 | SpectrogramDiffusionPipeline, 17 | T5FilmDecoder, 18 | ) 19 | 20 | try: 21 | if not (is_transformers_available() and is_torch_available() and is_note_seq_available()): 22 | raise OptionalDependencyNotAvailable() 23 | except OptionalDependencyNotAvailable: 24 | from ...utils.dummy_transformers_and_torch_and_note_seq_objects import * # noqa F403 25 | else: 26 | from .midi_utils import MidiProcessor 27 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/spectrogram_diffusion/notes_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The Music Spectrogram Diffusion Authors. 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | import torch.nn as nn 18 | from transformers.modeling_utils import ModuleUtilsMixin 19 | from transformers.models.t5.modeling_t5 import T5Block, T5Config, T5LayerNorm 20 | 21 | from ...configuration_utils import ConfigMixin, register_to_config 22 | from ...models import ModelMixin 23 | 24 | 25 | class SpectrogramNotesEncoder(ModelMixin, ConfigMixin, ModuleUtilsMixin): 26 | @register_to_config 27 | def __init__( 28 | self, 29 | max_length: int, 30 | vocab_size: int, 31 | d_model: int, 32 | dropout_rate: float, 33 | num_layers: int, 34 | num_heads: int, 35 | d_kv: int, 36 | d_ff: int, 37 | feed_forward_proj: str, 38 | is_decoder: bool = False, 39 | ): 40 | super().__init__() 41 | 42 | self.token_embedder = nn.Embedding(vocab_size, d_model) 43 | 44 | self.position_encoding = nn.Embedding(max_length, d_model) 45 | self.position_encoding.weight.requires_grad = False 46 | 47 | self.dropout_pre = nn.Dropout(p=dropout_rate) 48 | 49 | t5config = T5Config( 50 | vocab_size=vocab_size, 51 | d_model=d_model, 52 | num_heads=num_heads, 53 | d_kv=d_kv, 54 | d_ff=d_ff, 55 | dropout_rate=dropout_rate, 56 | feed_forward_proj=feed_forward_proj, 57 | is_decoder=is_decoder, 58 | is_encoder_decoder=False, 59 | ) 60 | 61 | self.encoders = nn.ModuleList() 62 | for lyr_num in range(num_layers): 63 | lyr = T5Block(t5config) 64 | self.encoders.append(lyr) 65 | 66 | self.layer_norm = T5LayerNorm(d_model) 67 | self.dropout_post = nn.Dropout(p=dropout_rate) 68 | 69 | def forward(self, encoder_input_tokens, encoder_inputs_mask): 70 | x = self.token_embedder(encoder_input_tokens) 71 | 72 | seq_length = encoder_input_tokens.shape[1] 73 | inputs_positions = torch.arange(seq_length, device=encoder_input_tokens.device) 74 | x += self.position_encoding(inputs_positions) 75 | 76 | x = self.dropout_pre(x) 77 | 78 | # inverted the attention mask 79 | input_shape = encoder_input_tokens.size() 80 | extended_attention_mask = self.get_extended_attention_mask(encoder_inputs_mask, input_shape) 81 | 82 | for lyr in self.encoders: 83 | x = lyr(x, extended_attention_mask)[0] 84 | x = self.layer_norm(x) 85 | 86 | return self.dropout_post(x), encoder_inputs_mask 87 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_cycle_diffusion.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_cycle_diffusion.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_attend_and_excite.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_attend_and_excite.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_depth2img.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_depth2img.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_diffedit.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_diffedit.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_image_variation.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_image_variation.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_img2img.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_img2img.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_inpaint.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_inpaint.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_inpaint_legacy.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_inpaint_legacy.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_instruct_pix2pix.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_instruct_pix2pix.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_latent_upscale.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_latent_upscale.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_model_editing.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_model_editing.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_panorama.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_panorama.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_pix2pix_zero.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_pix2pix_zero.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_sag.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_sag.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_upscale.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_upscale.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_unclip.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_unclip.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_unclip_img2img.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_unclip_img2img.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/safety_checker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/safety_checker.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__pycache__/stable_unclip_image_normalizer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion/__pycache__/stable_unclip_image_normalizer.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_controlnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # NOTE: This file is deprecated and will be removed in a future version. 16 | # It only exists so that temporarely `from diffusers.pipelines import DiffusionPipeline` works 17 | 18 | from ...utils import deprecate 19 | from ..controlnet.pipeline_flax_controlnet import FlaxStableDiffusionControlNetPipeline # noqa: F401 20 | 21 | 22 | deprecate( 23 | "stable diffusion controlnet", 24 | "0.22.0", 25 | "Importing `FlaxStableDiffusionControlNetPipeline` from diffusers.pipelines.stable_diffusion.flax_pipeline_stable_diffusion_controlnet is deprecated. Please import `from diffusers import FlaxStableDiffusionControlNetPipeline` instead.", 26 | standard_warn=False, 27 | stacklevel=3, 28 | ) 29 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_controlnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # NOTE: This file is deprecated and will be removed in a future version. 16 | # It only exists so that temporarely `from diffusers.pipelines import DiffusionPipeline` works 17 | from ...utils import deprecate 18 | from ..controlnet.multicontrolnet import MultiControlNetModel # noqa: F401 19 | from ..controlnet.pipeline_controlnet import StableDiffusionControlNetPipeline # noqa: F401 20 | 21 | 22 | deprecate( 23 | "stable diffusion controlnet", 24 | "0.22.0", 25 | "Importing `StableDiffusionControlNetPipeline` or `MultiControlNetModel` from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet is deprecated. Please import `from diffusers import StableDiffusionControlNetPipeline` instead.", 26 | standard_warn=False, 27 | stacklevel=3, 28 | ) 29 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Optional, Union 16 | 17 | import torch 18 | from torch import nn 19 | 20 | from ...configuration_utils import ConfigMixin, register_to_config 21 | from ...models.modeling_utils import ModelMixin 22 | 23 | 24 | class StableUnCLIPImageNormalizer(ModelMixin, ConfigMixin): 25 | """ 26 | This class is used to hold the mean and standard deviation of the CLIP embedder used in stable unCLIP. 27 | 28 | It is used to normalize the image embeddings before the noise is applied and un-normalize the noised image 29 | embeddings. 30 | """ 31 | 32 | @register_to_config 33 | def __init__( 34 | self, 35 | embedding_dim: int = 768, 36 | ): 37 | super().__init__() 38 | 39 | self.mean = nn.Parameter(torch.zeros(1, embedding_dim)) 40 | self.std = nn.Parameter(torch.ones(1, embedding_dim)) 41 | 42 | def to( 43 | self, 44 | torch_device: Optional[Union[str, torch.device]] = None, 45 | torch_dtype: Optional[torch.dtype] = None, 46 | ): 47 | self.mean = nn.Parameter(self.mean.to(torch_device).to(torch_dtype)) 48 | self.std = nn.Parameter(self.std.to(torch_device).to(torch_dtype)) 49 | return self 50 | 51 | def scale(self, embeds): 52 | embeds = (embeds - self.mean) * 1.0 / self.std 53 | return embeds 54 | 55 | def unscale(self, embeds): 56 | embeds = (embeds * self.std) + self.mean 57 | return embeds 58 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion_safe/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from enum import Enum 3 | from typing import List, Optional, Union 4 | 5 | import numpy as np 6 | import PIL 7 | from PIL import Image 8 | 9 | from ...utils import BaseOutput, is_torch_available, is_transformers_available 10 | 11 | 12 | @dataclass 13 | class SafetyConfig(object): 14 | WEAK = { 15 | "sld_warmup_steps": 15, 16 | "sld_guidance_scale": 20, 17 | "sld_threshold": 0.0, 18 | "sld_momentum_scale": 0.0, 19 | "sld_mom_beta": 0.0, 20 | } 21 | MEDIUM = { 22 | "sld_warmup_steps": 10, 23 | "sld_guidance_scale": 1000, 24 | "sld_threshold": 0.01, 25 | "sld_momentum_scale": 0.3, 26 | "sld_mom_beta": 0.4, 27 | } 28 | STRONG = { 29 | "sld_warmup_steps": 7, 30 | "sld_guidance_scale": 2000, 31 | "sld_threshold": 0.025, 32 | "sld_momentum_scale": 0.5, 33 | "sld_mom_beta": 0.7, 34 | } 35 | MAX = { 36 | "sld_warmup_steps": 0, 37 | "sld_guidance_scale": 5000, 38 | "sld_threshold": 1.0, 39 | "sld_momentum_scale": 0.5, 40 | "sld_mom_beta": 0.7, 41 | } 42 | 43 | 44 | @dataclass 45 | class StableDiffusionSafePipelineOutput(BaseOutput): 46 | """ 47 | Output class for Safe Stable Diffusion pipelines. 48 | 49 | Args: 50 | images (`List[PIL.Image.Image]` or `np.ndarray`) 51 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, 52 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. 53 | nsfw_content_detected (`List[bool]`) 54 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work" 55 | (nsfw) content, or `None` if safety checking could not be performed. 56 | images (`List[PIL.Image.Image]` or `np.ndarray`) 57 | List of denoised PIL images that were flagged by the safety checker any may contain "not-safe-for-work" 58 | (nsfw) content, or `None` if no safety check was performed or no images were flagged. 59 | applied_safety_concept (`str`) 60 | The safety concept that was applied for safety guidance, or `None` if safety guidance was disabled 61 | """ 62 | 63 | images: Union[List[PIL.Image.Image], np.ndarray] 64 | nsfw_content_detected: Optional[List[bool]] 65 | unsafe_images: Optional[Union[List[PIL.Image.Image], np.ndarray]] 66 | applied_safety_concept: Optional[str] 67 | 68 | 69 | if is_transformers_available() and is_torch_available(): 70 | from .pipeline_stable_diffusion_safe import StableDiffusionPipelineSafe 71 | from .safety_checker import SafeStableDiffusionSafetyChecker 72 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion_safe/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion_safe/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion_safe/__pycache__/pipeline_stable_diffusion_safe.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion_safe/__pycache__/pipeline_stable_diffusion_safe.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion_safe/__pycache__/safety_checker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stable_diffusion_safe/__pycache__/safety_checker.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stochastic_karras_ve/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_stochastic_karras_ve import KarrasVePipeline 2 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/stochastic_karras_ve/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stochastic_karras_ve/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/stochastic_karras_ve/__pycache__/pipeline_stochastic_karras_ve.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/stochastic_karras_ve/__pycache__/pipeline_stochastic_karras_ve.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/text_to_video_synthesis/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional, Union 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from ...utils import BaseOutput, OptionalDependencyNotAvailable, is_torch_available, is_transformers_available 8 | 9 | 10 | @dataclass 11 | class TextToVideoSDPipelineOutput(BaseOutput): 12 | """ 13 | Output class for text to video pipelines. 14 | 15 | Args: 16 | frames (`List[np.ndarray]` or `torch.FloatTensor`) 17 | List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as 18 | a `torch` tensor. NumPy array present the denoised images of the diffusion pipeline. The length of the list 19 | denotes the video length i.e., the number of frames. 20 | """ 21 | 22 | frames: Union[List[np.ndarray], torch.FloatTensor] 23 | 24 | 25 | try: 26 | if not (is_transformers_available() and is_torch_available()): 27 | raise OptionalDependencyNotAvailable() 28 | except OptionalDependencyNotAvailable: 29 | from ...utils.dummy_torch_and_transformers_objects import * # noqa F403 30 | else: 31 | from .pipeline_text_to_video_synth import TextToVideoSDPipeline # noqa: F401 32 | from .pipeline_text_to_video_zero import TextToVideoZeroPipeline 33 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/text_to_video_synthesis/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/text_to_video_synthesis/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/text_to_video_synthesis/__pycache__/pipeline_text_to_video_synth.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/text_to_video_synthesis/__pycache__/pipeline_text_to_video_synth.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/text_to_video_synthesis/__pycache__/pipeline_text_to_video_zero.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/text_to_video_synthesis/__pycache__/pipeline_text_to_video_zero.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/unclip/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_torch_available, 4 | is_transformers_available, 5 | is_transformers_version, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import UnCLIPImageVariationPipeline, UnCLIPPipeline 14 | else: 15 | from .pipeline_unclip import UnCLIPPipeline 16 | from .pipeline_unclip_image_variation import UnCLIPImageVariationPipeline 17 | from .text_proj import UnCLIPTextProjModel 18 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/unclip/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/unclip/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/unclip/__pycache__/pipeline_unclip.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/unclip/__pycache__/pipeline_unclip.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/unclip/__pycache__/pipeline_unclip_image_variation.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/unclip/__pycache__/pipeline_unclip_image_variation.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/unclip/__pycache__/text_proj.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/unclip/__pycache__/text_proj.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/unidiffuser/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_torch_available, 4 | is_transformers_available, 5 | is_transformers_version, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available()): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import ( 14 | ImageTextPipelineOutput, 15 | UniDiffuserPipeline, 16 | ) 17 | else: 18 | from .modeling_text_decoder import UniDiffuserTextDecoder 19 | from .modeling_uvit import UniDiffuserModel, UTransformer2DModel 20 | from .pipeline_unidiffuser import ImageTextPipelineOutput, UniDiffuserPipeline 21 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/unidiffuser/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/unidiffuser/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/unidiffuser/__pycache__/modeling_text_decoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/unidiffuser/__pycache__/modeling_text_decoder.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/unidiffuser/__pycache__/modeling_uvit.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/unidiffuser/__pycache__/modeling_uvit.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/unidiffuser/__pycache__/pipeline_unidiffuser.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/unidiffuser/__pycache__/pipeline_unidiffuser.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/versatile_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_torch_available, 4 | is_transformers_available, 5 | is_transformers_version, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import ( 14 | VersatileDiffusionDualGuidedPipeline, 15 | VersatileDiffusionImageVariationPipeline, 16 | VersatileDiffusionPipeline, 17 | VersatileDiffusionTextToImagePipeline, 18 | ) 19 | else: 20 | from .modeling_text_unet import UNetFlatConditionModel 21 | from .pipeline_versatile_diffusion import VersatileDiffusionPipeline 22 | from .pipeline_versatile_diffusion_dual_guided import VersatileDiffusionDualGuidedPipeline 23 | from .pipeline_versatile_diffusion_image_variation import VersatileDiffusionImageVariationPipeline 24 | from .pipeline_versatile_diffusion_text_to_image import VersatileDiffusionTextToImagePipeline 25 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/versatile_diffusion/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/versatile_diffusion/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/versatile_diffusion/__pycache__/modeling_text_unet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/versatile_diffusion/__pycache__/modeling_text_unet.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/versatile_diffusion/__pycache__/pipeline_versatile_diffusion.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/versatile_diffusion/__pycache__/pipeline_versatile_diffusion.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/versatile_diffusion/__pycache__/pipeline_versatile_diffusion_dual_guided.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/versatile_diffusion/__pycache__/pipeline_versatile_diffusion_dual_guided.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/versatile_diffusion/__pycache__/pipeline_versatile_diffusion_image_variation.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/versatile_diffusion/__pycache__/pipeline_versatile_diffusion_image_variation.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/versatile_diffusion/__pycache__/pipeline_versatile_diffusion_text_to_image.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/versatile_diffusion/__pycache__/pipeline_versatile_diffusion_text_to_image.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/vq_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import is_torch_available, is_transformers_available 2 | 3 | 4 | if is_transformers_available() and is_torch_available(): 5 | from .pipeline_vq_diffusion import LearnedClassifierFreeSamplingEmbeddings, VQDiffusionPipeline 6 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/vq_diffusion/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/vq_diffusion/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/pipelines/vq_diffusion/__pycache__/pipeline_vq_diffusion.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/pipelines/vq_diffusion/__pycache__/pipeline_vq_diffusion.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/README.md: -------------------------------------------------------------------------------- 1 | # Schedulers 2 | 3 | For more information on the schedulers, please refer to the [docs](https://huggingface.co/docs/diffusers/api/schedulers/overview). -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_ddim.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_ddim.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_ddim_inverse.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_ddim_inverse.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_ddpm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_ddpm.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_deis_multistep.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_deis_multistep.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep_inverse.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep_inverse.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_dpmsolver_singlestep.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_dpmsolver_singlestep.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_euler_ancestral_discrete.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_euler_ancestral_discrete.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_euler_discrete.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_euler_discrete.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_heun_discrete.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_heun_discrete.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_ipndm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_ipndm.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_k_dpm_2_ancestral_discrete.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_k_dpm_2_ancestral_discrete.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_k_dpm_2_discrete.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_k_dpm_2_discrete.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_karras_ve.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_karras_ve.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_lms_discrete.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_lms_discrete.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_pndm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_pndm.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_repaint.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_repaint.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_sde_ve.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_sde_ve.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_sde_vp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_sde_vp.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_unclip.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_unclip.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_unipc_multistep.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_unipc_multistep.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/schedulers/__pycache__/scheduling_vq_diffusion.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/schedulers/__pycache__/scheduling_vq_diffusion.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/accelerate_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/accelerate_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/constants.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/constants.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/deprecation_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/deprecation_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/doc_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/doc_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/dummy_flax_and_transformers_objects.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/dummy_flax_and_transformers_objects.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/dummy_flax_objects.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/dummy_flax_objects.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/dummy_note_seq_objects.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/dummy_note_seq_objects.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/dummy_onnx_objects.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/dummy_onnx_objects.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/dummy_torch_and_torchsde_objects.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/dummy_torch_and_torchsde_objects.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_k_diffusion_objects.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_k_diffusion_objects.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_onnx_objects.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_onnx_objects.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/dummy_transformers_and_torch_and_note_seq_objects.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/dummy_transformers_and_torch_and_note_seq_objects.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/dynamic_modules_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/dynamic_modules_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/hub_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/hub_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/import_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/import_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/logging.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/logging.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/outputs.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/outputs.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/pil_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/pil_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/testing_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/testing_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/__pycache__/torch_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/diffusers/utils/__pycache__/torch_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/diffusers/utils/accelerate_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Accelerate utilities: Utilities related to accelerate 16 | """ 17 | 18 | from packaging import version 19 | 20 | from .import_utils import is_accelerate_available 21 | 22 | 23 | if is_accelerate_available(): 24 | import accelerate 25 | 26 | 27 | def apply_forward_hook(method): 28 | """ 29 | Decorator that applies a registered CpuOffload hook to an arbitrary function rather than `forward`. This is useful 30 | for cases where a PyTorch module provides functions other than `forward` that should trigger a move to the 31 | appropriate acceleration device. This is the case for `encode` and `decode` in [`AutoencoderKL`]. 32 | 33 | This decorator looks inside the internal `_hf_hook` property to find a registered offload hook. 34 | 35 | :param method: The method to decorate. This method should be a method of a PyTorch module. 36 | """ 37 | if not is_accelerate_available(): 38 | return method 39 | accelerate_version = version.parse(accelerate.__version__).base_version 40 | if version.parse(accelerate_version) < version.parse("0.17.0"): 41 | return method 42 | 43 | def wrapper(self, *args, **kwargs): 44 | if hasattr(self, "_hf_hook") and hasattr(self._hf_hook, "pre_forward"): 45 | self._hf_hook.pre_forward(self) 46 | return method(self, *args, **kwargs) 47 | 48 | return wrapper 49 | -------------------------------------------------------------------------------- /src/diffusers/utils/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE, hf_cache_home 17 | 18 | 19 | default_cache_path = HUGGINGFACE_HUB_CACHE 20 | 21 | 22 | CONFIG_NAME = "config.json" 23 | WEIGHTS_NAME = "diffusion_pytorch_model.bin" 24 | FLAX_WEIGHTS_NAME = "diffusion_flax_model.msgpack" 25 | ONNX_WEIGHTS_NAME = "model.onnx" 26 | SAFETENSORS_WEIGHTS_NAME = "diffusion_pytorch_model.safetensors" 27 | ONNX_EXTERNAL_WEIGHTS_NAME = "weights.pb" 28 | HUGGINGFACE_CO_RESOLVE_ENDPOINT = "https://huggingface.co" 29 | DIFFUSERS_CACHE = default_cache_path 30 | DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules" 31 | HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules")) 32 | DEPRECATED_REVISION_ARGS = ["fp16", "non-ema"] 33 | TEXT_ENCODER_TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "out_proj"] 34 | -------------------------------------------------------------------------------- /src/diffusers/utils/deprecation_utils.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import warnings 3 | from typing import Any, Dict, Optional, Union 4 | 5 | from packaging import version 6 | 7 | 8 | def deprecate(*args, take_from: Optional[Union[Dict, Any]] = None, standard_warn=True, stacklevel=2): 9 | from .. import __version__ 10 | 11 | deprecated_kwargs = take_from 12 | values = () 13 | if not isinstance(args[0], tuple): 14 | args = (args,) 15 | 16 | for attribute, version_name, message in args: 17 | if version.parse(version.parse(__version__).base_version) >= version.parse(version_name): 18 | raise ValueError( 19 | f"The deprecation tuple {(attribute, version_name, message)} should be removed since diffusers'" 20 | f" version {__version__} is >= {version_name}" 21 | ) 22 | 23 | warning = None 24 | if isinstance(deprecated_kwargs, dict) and attribute in deprecated_kwargs: 25 | values += (deprecated_kwargs.pop(attribute),) 26 | warning = f"The `{attribute}` argument is deprecated and will be removed in version {version_name}." 27 | elif hasattr(deprecated_kwargs, attribute): 28 | values += (getattr(deprecated_kwargs, attribute),) 29 | warning = f"The `{attribute}` attribute is deprecated and will be removed in version {version_name}." 30 | elif deprecated_kwargs is None: 31 | warning = f"`{attribute}` is deprecated and will be removed in version {version_name}." 32 | 33 | if warning is not None: 34 | warning = warning + " " if standard_warn else "" 35 | warnings.warn(warning + message, FutureWarning, stacklevel=stacklevel) 36 | 37 | if isinstance(deprecated_kwargs, dict) and len(deprecated_kwargs) > 0: 38 | call_frame = inspect.getouterframes(inspect.currentframe())[1] 39 | filename = call_frame.filename 40 | line_number = call_frame.lineno 41 | function = call_frame.function 42 | key, value = next(iter(deprecated_kwargs.items())) 43 | raise TypeError(f"{function} in {filename} line {line_number-1} got an unexpected keyword argument `{key}`") 44 | 45 | if len(values) == 0: 46 | return 47 | elif len(values) == 1: 48 | return values[0] 49 | return values 50 | -------------------------------------------------------------------------------- /src/diffusers/utils/doc_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Doc utilities: Utilities related to documentation 16 | """ 17 | import re 18 | 19 | 20 | def replace_example_docstring(example_docstring): 21 | def docstring_decorator(fn): 22 | func_doc = fn.__doc__ 23 | lines = func_doc.split("\n") 24 | i = 0 25 | while i < len(lines) and re.search(r"^\s*Examples?:\s*$", lines[i]) is None: 26 | i += 1 27 | if i < len(lines): 28 | lines[i] = example_docstring 29 | func_doc = "\n".join(lines) 30 | else: 31 | raise ValueError( 32 | f"The function {fn} should have an empty 'Examples:' in its docstring as placeholder, " 33 | f"current docstring is:\n{func_doc}" 34 | ) 35 | fn.__doc__ = func_doc 36 | return fn 37 | 38 | return docstring_decorator 39 | -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_flax_and_transformers_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class FlaxStableDiffusionControlNetPipeline(metaclass=DummyObject): 6 | _backends = ["flax", "transformers"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["flax", "transformers"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["flax", "transformers"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["flax", "transformers"]) 18 | 19 | 20 | class FlaxStableDiffusionImg2ImgPipeline(metaclass=DummyObject): 21 | _backends = ["flax", "transformers"] 22 | 23 | def __init__(self, *args, **kwargs): 24 | requires_backends(self, ["flax", "transformers"]) 25 | 26 | @classmethod 27 | def from_config(cls, *args, **kwargs): 28 | requires_backends(cls, ["flax", "transformers"]) 29 | 30 | @classmethod 31 | def from_pretrained(cls, *args, **kwargs): 32 | requires_backends(cls, ["flax", "transformers"]) 33 | 34 | 35 | class FlaxStableDiffusionInpaintPipeline(metaclass=DummyObject): 36 | _backends = ["flax", "transformers"] 37 | 38 | def __init__(self, *args, **kwargs): 39 | requires_backends(self, ["flax", "transformers"]) 40 | 41 | @classmethod 42 | def from_config(cls, *args, **kwargs): 43 | requires_backends(cls, ["flax", "transformers"]) 44 | 45 | @classmethod 46 | def from_pretrained(cls, *args, **kwargs): 47 | requires_backends(cls, ["flax", "transformers"]) 48 | 49 | 50 | class FlaxStableDiffusionPipeline(metaclass=DummyObject): 51 | _backends = ["flax", "transformers"] 52 | 53 | def __init__(self, *args, **kwargs): 54 | requires_backends(self, ["flax", "transformers"]) 55 | 56 | @classmethod 57 | def from_config(cls, *args, **kwargs): 58 | requires_backends(cls, ["flax", "transformers"]) 59 | 60 | @classmethod 61 | def from_pretrained(cls, *args, **kwargs): 62 | requires_backends(cls, ["flax", "transformers"]) 63 | -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_note_seq_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class MidiProcessor(metaclass=DummyObject): 6 | _backends = ["note_seq"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["note_seq"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["note_seq"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["note_seq"]) 18 | -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_onnx_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class OnnxRuntimeModel(metaclass=DummyObject): 6 | _backends = ["onnx"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["onnx"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["onnx"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["onnx"]) 18 | -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_torch_and_librosa_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class AudioDiffusionPipeline(metaclass=DummyObject): 6 | _backends = ["torch", "librosa"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["torch", "librosa"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["torch", "librosa"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["torch", "librosa"]) 18 | 19 | 20 | class Mel(metaclass=DummyObject): 21 | _backends = ["torch", "librosa"] 22 | 23 | def __init__(self, *args, **kwargs): 24 | requires_backends(self, ["torch", "librosa"]) 25 | 26 | @classmethod 27 | def from_config(cls, *args, **kwargs): 28 | requires_backends(cls, ["torch", "librosa"]) 29 | 30 | @classmethod 31 | def from_pretrained(cls, *args, **kwargs): 32 | requires_backends(cls, ["torch", "librosa"]) 33 | -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_torch_and_scipy_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class LMSDiscreteScheduler(metaclass=DummyObject): 6 | _backends = ["torch", "scipy"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["torch", "scipy"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["torch", "scipy"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["torch", "scipy"]) 18 | -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_torch_and_torchsde_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class DPMSolverSDEScheduler(metaclass=DummyObject): 6 | _backends = ["torch", "torchsde"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["torch", "torchsde"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["torch", "torchsde"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["torch", "torchsde"]) 18 | -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_torch_and_transformers_and_k_diffusion_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class StableDiffusionKDiffusionPipeline(metaclass=DummyObject): 6 | _backends = ["torch", "transformers", "k_diffusion"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["torch", "transformers", "k_diffusion"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["torch", "transformers", "k_diffusion"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["torch", "transformers", "k_diffusion"]) 18 | -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class SpectrogramDiffusionPipeline(metaclass=DummyObject): 6 | _backends = ["transformers", "torch", "note_seq"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["transformers", "torch", "note_seq"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["transformers", "torch", "note_seq"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["transformers", "torch", "note_seq"]) 18 | -------------------------------------------------------------------------------- /src/diffusers/utils/model_card_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | {{ card_data }} 3 | --- 4 | 5 | 7 | 8 | # {{ model_name | default("Diffusion Model") }} 9 | 10 | ## Model description 11 | 12 | This diffusion model is trained with the [🤗 Diffusers](https://github.com/huggingface/diffusers) library 13 | on the `{{ dataset_name }}` dataset. 14 | 15 | ## Intended uses & limitations 16 | 17 | #### How to use 18 | 19 | ```python 20 | # TODO: add an example code snippet for running this diffusion pipeline 21 | ``` 22 | 23 | #### Limitations and bias 24 | 25 | [TODO: provide examples of latent issues and potential remediations] 26 | 27 | ## Training data 28 | 29 | [TODO: describe the data used to train the model] 30 | 31 | ### Training hyperparameters 32 | 33 | The following hyperparameters were used during training: 34 | - learning_rate: {{ learning_rate }} 35 | - train_batch_size: {{ train_batch_size }} 36 | - eval_batch_size: {{ eval_batch_size }} 37 | - gradient_accumulation_steps: {{ gradient_accumulation_steps }} 38 | - optimizer: AdamW with betas=({{ adam_beta1 }}, {{ adam_beta2 }}), weight_decay={{ adam_weight_decay }} and epsilon={{ adam_epsilon }} 39 | - lr_scheduler: {{ lr_scheduler }} 40 | - lr_warmup_steps: {{ lr_warmup_steps }} 41 | - ema_inv_gamma: {{ ema_inv_gamma }} 42 | - ema_inv_gamma: {{ ema_power }} 43 | - ema_inv_gamma: {{ ema_max_decay }} 44 | - mixed_precision: {{ mixed_precision }} 45 | 46 | ### Training results 47 | 48 | 📈 [TensorBoard logs](https://huggingface.co/{{ repo_name }}/tensorboard?#scalars) 49 | 50 | 51 | -------------------------------------------------------------------------------- /src/diffusers/utils/pil_utils.py: -------------------------------------------------------------------------------- 1 | import PIL.Image 2 | import PIL.ImageOps 3 | from packaging import version 4 | from PIL import Image 5 | 6 | 7 | if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"): 8 | PIL_INTERPOLATION = { 9 | "linear": PIL.Image.Resampling.BILINEAR, 10 | "bilinear": PIL.Image.Resampling.BILINEAR, 11 | "bicubic": PIL.Image.Resampling.BICUBIC, 12 | "lanczos": PIL.Image.Resampling.LANCZOS, 13 | "nearest": PIL.Image.Resampling.NEAREST, 14 | } 15 | else: 16 | PIL_INTERPOLATION = { 17 | "linear": PIL.Image.LINEAR, 18 | "bilinear": PIL.Image.BILINEAR, 19 | "bicubic": PIL.Image.BICUBIC, 20 | "lanczos": PIL.Image.LANCZOS, 21 | "nearest": PIL.Image.NEAREST, 22 | } 23 | 24 | 25 | def pt_to_pil(images): 26 | images = (images / 2 + 0.5).clamp(0, 1) 27 | images = images.cpu().permute(0, 2, 3, 1).float().numpy() 28 | images = numpy_to_pil(images) 29 | return images 30 | 31 | 32 | def numpy_to_pil(images): 33 | """ 34 | Convert a numpy image or a batch of images to a PIL image. 35 | """ 36 | if images.ndim == 3: 37 | images = images[None, ...] 38 | images = (images * 255).round().astype("uint8") 39 | if images.shape[-1] == 1: 40 | # special case for grayscale (single channel) images 41 | pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images] 42 | else: 43 | pil_images = [Image.fromarray(image) for image in images] 44 | 45 | return pil_images 46 | -------------------------------------------------------------------------------- /src/latent_diffusion/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/.DS_Store -------------------------------------------------------------------------------- /src/latent_diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/__init__.py -------------------------------------------------------------------------------- /src/latent_diffusion/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/__pycache__/util.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/__pycache__/util.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/__pycache__/util.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/__pycache__/util.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/models/__init__.py -------------------------------------------------------------------------------- /src/latent_diffusion/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/models/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/models/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/models/__pycache__/ddim.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/models/__pycache__/ddim.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/models/__pycache__/ddim.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/models/__pycache__/ddim.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/models/__pycache__/ddpm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/models/__pycache__/ddpm.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/models/__pycache__/ddpm.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/models/__pycache__/ddpm.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/models/__pycache__/ddpm_flow.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/models/__pycache__/ddpm_flow.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/models/__pycache__/plms.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/models/__pycache__/plms.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/models/__pycache__/plms.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/models/__pycache__/plms.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/models/dpm_solver/__init__.py: -------------------------------------------------------------------------------- 1 | from .sampler import DPMSolverSampler 2 | -------------------------------------------------------------------------------- /src/latent_diffusion/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/__init__.py -------------------------------------------------------------------------------- /src/latent_diffusion/modules/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/__pycache__/attention.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/__pycache__/attention.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/__pycache__/attention.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/__pycache__/attention.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/__pycache__/ema.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/__pycache__/ema.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/__pycache__/ema.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/__pycache__/ema.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/README.md: -------------------------------------------------------------------------------- 1 | # A simple use of Audio Masked AutoEncoder (AudioMAE) 2 | Reference code: https://github.com/facebookresearch/AudioMAE 3 | 4 | Paper: https://arxiv.org/abs/2207.06405 5 | 6 | Install the required python packages: 7 | ``` 8 | pip install -r requirments.txt 9 | ``` 10 | 11 | 12 | See the usage in example.py 13 | 14 | 15 | 16 | ``` 17 | python example.py 18 | 19 | """ 20 | Load AudioMAE from /mnt/bn/data-xubo/project/Masked_AudioEncoder checkpoint/finetuned.pth / message: 21 | Start evaluation on AudioSet ... 22 | mAP: 0.463003 23 | """ 24 | ``` -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/__init__.py -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/__pycache__/AudioMAE.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/__pycache__/AudioMAE.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/__pycache__/AudioMAE.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/__pycache__/AudioMAE.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/__pycache__/models_mae.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/__pycache__/models_mae.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/__pycache__/models_mae.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/__pycache__/models_mae.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/__pycache__/models_vit.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/__pycache__/models_vit.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/__pycache__/models_vit.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/__pycache__/models_vit.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/example.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from timm.models.layers import to_2tuple 5 | import models_vit 6 | from audiovisual_dataset import AudioVisualDataset, collate_fn 7 | from torch.utils.data import DataLoader 8 | from util.stat import calculate_stats 9 | from tqdm import tqdm 10 | from AudioMAE import AudioMAE 11 | 12 | if __name__ == '__main__': 13 | device = 'cuda' 14 | dataset = AudioVisualDataset( 15 | datafiles=['/mnt/bn/data-xubo/dataset/audioset_videos/datafiles/audioset_eval.json'], 16 | # disable SpecAug during evaluation 17 | freqm=0, 18 | timem=0, 19 | return_label=True 20 | ) 21 | 22 | model = AudioMAE().to(device) 23 | model.eval() 24 | 25 | outputs=[] 26 | targets=[] 27 | 28 | dataloader = DataLoader(dataset, batch_size=64, num_workers=8, shuffle=False, collate_fn=collate_fn) 29 | 30 | print('Start evaluation on AudioSet ...') 31 | with torch.no_grad(): 32 | for data in tqdm(dataloader): 33 | fbank = data['fbank'] # [B, 1, T, F] 34 | fbank = fbank.to(device) 35 | output = model(fbank, mask_t_prob=0.0, mask_f_prob=0.0) 36 | target = data['labels'] 37 | outputs.append(output) 38 | targets.append(target) 39 | 40 | outputs=torch.cat(outputs).cpu().numpy() 41 | targets=torch.cat(targets).cpu().numpy() 42 | stats = calculate_stats(outputs, targets) 43 | 44 | AP = [stat['AP'] for stat in stats] 45 | mAP = np.mean([stat['AP'] for stat in stats]) 46 | print("Done ... mAP: {:.6f}".format(mAP)) 47 | 48 | # mAP: 0.463003 -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/requirements.txt: -------------------------------------------------------------------------------- 1 | ipdb 2 | tqdm 3 | decord 4 | pandas 5 | scipy 6 | scikit-learn 7 | timm==0.3.2 8 | torchaudio==0.8.1 9 | torch==1.8.1 -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/util/__pycache__/patch_embed.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/util/__pycache__/patch_embed.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/util/__pycache__/patch_embed.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/util/__pycache__/patch_embed.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/util/__pycache__/pos_embed.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/util/__pycache__/pos_embed.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/util/__pycache__/pos_embed.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/audiomae/util/__pycache__/pos_embed.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/util/crop.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | 9 | import torch 10 | 11 | from torchvision import transforms 12 | from torchvision.transforms import functional as F 13 | 14 | 15 | class RandomResizedCrop(transforms.RandomResizedCrop): 16 | """ 17 | RandomResizedCrop for matching TF/TPU implementation: no for-loop is used. 18 | This may lead to results different with torchvision's version. 19 | Following BYOL's TF code: 20 | https://github.com/deepmind/deepmind-research/blob/master/byol/utils/dataset.py#L206 21 | """ 22 | @staticmethod 23 | def get_params(img, scale, ratio): 24 | width, height = F._get_image_size(img) 25 | area = height * width 26 | 27 | target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item() 28 | log_ratio = torch.log(torch.tensor(ratio)) 29 | aspect_ratio = torch.exp( 30 | torch.empty(1).uniform_(log_ratio[0], log_ratio[1]) 31 | ).item() 32 | 33 | w = int(round(math.sqrt(target_area * aspect_ratio))) 34 | h = int(round(math.sqrt(target_area / aspect_ratio))) 35 | 36 | w = min(w, width) 37 | h = min(h, height) 38 | 39 | i = torch.randint(0, height - h + 1, size=(1,)).item() 40 | j = torch.randint(0, width - w + 1, size=(1,)).item() 41 | 42 | return i, j, h, w -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/util/datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # References: 8 | # DeiT: https://github.com/facebookresearch/deit 9 | # -------------------------------------------------------- 10 | 11 | import os 12 | import PIL 13 | 14 | from torchvision import datasets, transforms 15 | 16 | from timm.data import create_transform 17 | from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 18 | 19 | 20 | def build_dataset(is_train, args): 21 | transform = build_transform(is_train, args) 22 | 23 | root = os.path.join(args.data_path, 'train' if is_train else 'val') 24 | dataset = datasets.ImageFolder(root, transform=transform) 25 | 26 | print(dataset) 27 | 28 | return dataset 29 | 30 | 31 | def build_transform(is_train, args): 32 | mean = IMAGENET_DEFAULT_MEAN 33 | std = IMAGENET_DEFAULT_STD 34 | # train transform 35 | if is_train: 36 | # this should always dispatch to transforms_imagenet_train 37 | transform = create_transform( 38 | input_size=args.input_size, 39 | is_training=True, 40 | color_jitter=args.color_jitter, 41 | auto_augment=args.aa, 42 | interpolation='bicubic', 43 | re_prob=args.reprob, 44 | re_mode=args.remode, 45 | re_count=args.recount, 46 | mean=mean, 47 | std=std, 48 | ) 49 | return transform 50 | 51 | # eval transform 52 | t = [] 53 | if args.input_size <= 224: 54 | crop_pct = 224 / 256 55 | else: 56 | crop_pct = 1.0 57 | size = int(args.input_size / crop_pct) 58 | t.append( 59 | transforms.Resize(size, interpolation=PIL.Image.BICUBIC), # to maintain same ratio w.r.t. 224 images 60 | ) 61 | t.append(transforms.CenterCrop(args.input_size)) 62 | 63 | t.append(transforms.ToTensor()) 64 | t.append(transforms.Normalize(mean, std)) 65 | return transforms.Compose(t) 66 | -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/util/lars.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # LARS optimizer, implementation from MoCo v3: 8 | # https://github.com/facebookresearch/moco-v3 9 | # -------------------------------------------------------- 10 | 11 | import torch 12 | 13 | 14 | class LARS(torch.optim.Optimizer): 15 | """ 16 | LARS optimizer, no rate scaling or weight decay for parameters <= 1D. 17 | """ 18 | def __init__(self, params, lr=0, weight_decay=0, momentum=0.9, trust_coefficient=0.001): 19 | defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum, trust_coefficient=trust_coefficient) 20 | super().__init__(params, defaults) 21 | 22 | @torch.no_grad() 23 | def step(self): 24 | for g in self.param_groups: 25 | for p in g['params']: 26 | dp = p.grad 27 | 28 | if dp is None: 29 | continue 30 | 31 | if p.ndim > 1: # if not normalization gamma/beta or bias 32 | dp = dp.add(p, alpha=g['weight_decay']) 33 | param_norm = torch.norm(p) 34 | update_norm = torch.norm(dp) 35 | one = torch.ones_like(param_norm) 36 | q = torch.where(param_norm > 0., 37 | torch.where(update_norm > 0, 38 | (g['trust_coefficient'] * param_norm / update_norm), one), 39 | one) 40 | dp = dp.mul(q) 41 | 42 | param_state = self.state[p] 43 | if 'mu' not in param_state: 44 | param_state['mu'] = torch.zeros_like(p) 45 | mu = param_state['mu'] 46 | mu.mul_(g['momentum']).add_(dp) 47 | p.add_(mu, alpha=-g['lr']) -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/util/lr_decay.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # References: 8 | # ELECTRA https://github.com/google-research/electra 9 | # BEiT: https://github.com/microsoft/unilm/tree/master/beit 10 | # -------------------------------------------------------- 11 | 12 | import json 13 | 14 | 15 | def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75): 16 | """ 17 | Parameter groups for layer-wise lr decay 18 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58 19 | """ 20 | param_group_names = {} 21 | param_groups = {} 22 | 23 | num_layers = len(model.blocks) + 1 24 | 25 | layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1)) 26 | 27 | for n, p in model.named_parameters(): 28 | if not p.requires_grad: 29 | continue 30 | 31 | # no decay: all 1D parameters and model specific ones 32 | if p.ndim == 1 or n in no_weight_decay_list: 33 | g_decay = "no_decay" 34 | this_decay = 0. 35 | else: 36 | g_decay = "decay" 37 | this_decay = weight_decay 38 | 39 | layer_id = get_layer_id_for_vit(n, num_layers) 40 | group_name = "layer_%d_%s" % (layer_id, g_decay) 41 | 42 | if group_name not in param_group_names: 43 | this_scale = layer_scales[layer_id] 44 | 45 | param_group_names[group_name] = { 46 | "lr_scale": this_scale, 47 | "weight_decay": this_decay, 48 | "params": [], 49 | } 50 | param_groups[group_name] = { 51 | "lr_scale": this_scale, 52 | "weight_decay": this_decay, 53 | "params": [], 54 | } 55 | 56 | param_group_names[group_name]["params"].append(n) 57 | param_groups[group_name]["params"].append(p) 58 | 59 | # print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2)) 60 | 61 | return list(param_groups.values()) 62 | 63 | 64 | def get_layer_id_for_vit(name, num_layers): 65 | """ 66 | Assign a parameter with its layer id 67 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33 68 | """ 69 | if name in ['cls_token', 'pos_embed']: 70 | return 0 71 | elif name.startswith('patch_embed'): 72 | return 0 73 | elif name.startswith('blocks'): 74 | return int(name.split('.')[1]) + 1 75 | else: 76 | return num_layers -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/util/lr_sched.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | 9 | def adjust_learning_rate(optimizer, epoch, args): 10 | """Decay the learning rate with half-cycle cosine after warmup""" 11 | if epoch < args.warmup_epochs: 12 | lr = args.lr * epoch / args.warmup_epochs 13 | else: 14 | lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * \ 15 | (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs))) 16 | for param_group in optimizer.param_groups: 17 | if "lr_scale" in param_group: 18 | param_group["lr"] = lr * param_group["lr_scale"] 19 | else: 20 | param_group["lr"] = lr 21 | return lr 22 | -------------------------------------------------------------------------------- /src/latent_diffusion/modules/audiomae/util/stat.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import stats 3 | from sklearn import metrics 4 | import torch 5 | 6 | def d_prime(auc): 7 | standard_normal = stats.norm() 8 | d_prime = standard_normal.ppf(auc) * np.sqrt(2.0) 9 | return d_prime 10 | 11 | @torch.no_grad() 12 | def concat_all_gather(tensor): 13 | """ 14 | Performs all_gather operation on the provided tensors. 15 | *** Warning ***: torch.distributed.all_gather has no gradient. 16 | """ 17 | tensors_gather = [torch.ones_like(tensor) 18 | for _ in range(torch.distributed.get_world_size())] 19 | torch.distributed.all_gather(tensors_gather, tensor, async_op=False) 20 | 21 | output = torch.cat(tensors_gather, dim=0) 22 | return output 23 | 24 | def calculate_stats(output, target): 25 | """Calculate statistics including mAP, AUC, etc. 26 | 27 | Args: 28 | output: 2d array, (samples_num, classes_num) 29 | target: 2d array, (samples_num, classes_num) 30 | 31 | Returns: 32 | stats: list of statistic of each class. 33 | """ 34 | 35 | classes_num = target.shape[-1] 36 | stats = [] 37 | 38 | # Accuracy, only used for single-label classification such as esc-50, not for multiple label one such as AudioSet 39 | acc = metrics.accuracy_score(np.argmax(target, 1), np.argmax(output, 1)) 40 | 41 | # Class-wise statistics 42 | for k in range(classes_num): 43 | 44 | # Average precision 45 | avg_precision = metrics.average_precision_score( 46 | target[:, k], output[:, k], average=None) 47 | 48 | # AUC 49 | # auc = metrics.roc_auc_score(target[:, k], output[:, k], average=None) 50 | 51 | # Precisions, recalls 52 | (precisions, recalls, thresholds) = metrics.precision_recall_curve( 53 | target[:, k], output[:, k]) 54 | 55 | # FPR, TPR 56 | (fpr, tpr, thresholds) = metrics.roc_curve(target[:, k], output[:, k]) 57 | 58 | save_every_steps = 1000 # Sample statistics to reduce size 59 | dict = {'precisions': precisions[0::save_every_steps], 60 | 'recalls': recalls[0::save_every_steps], 61 | 'AP': avg_precision, 62 | 'fpr': fpr[0::save_every_steps], 63 | 'fnr': 1. - tpr[0::save_every_steps], 64 | # 'auc': auc, 65 | # note acc is not class-wise, this is just to keep consistent with other metrics 66 | 'acc': acc 67 | } 68 | stats.append(dict) 69 | 70 | return stats -------------------------------------------------------------------------------- /src/latent_diffusion/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /src/latent_diffusion/modules/diffusionmodules/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/diffusionmodules/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/diffusionmodules/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/diffusionmodules/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/diffusionmodules/__pycache__/model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/diffusionmodules/__pycache__/model.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/diffusionmodules/__pycache__/model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/diffusionmodules/__pycache__/model.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/diffusionmodules/__pycache__/openaimodel.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/diffusionmodules/__pycache__/openaimodel.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/diffusionmodules/__pycache__/openaimodel.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/diffusionmodules/__pycache__/openaimodel.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/diffusionmodules/__pycache__/util.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/diffusionmodules/__pycache__/util.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/diffusionmodules/__pycache__/util.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/diffusionmodules/__pycache__/util.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/distributions/__init__.py -------------------------------------------------------------------------------- /src/latent_diffusion/modules/distributions/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/distributions/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/distributions/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/distributions/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/distributions/__pycache__/distributions.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/distributions/__pycache__/distributions.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/distributions/__pycache__/distributions.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/distributions/__pycache__/distributions.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/encoders/__init__.py -------------------------------------------------------------------------------- /src/latent_diffusion/modules/encoders/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/encoders/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/encoders/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/encoders/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/encoders/__pycache__/modules.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/encoders/__pycache__/modules.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/encoders/__pycache__/modules.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/encoders/__pycache__/modules.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .contperceptual import LPIPSWithDiscriminator 2 | from .waveform_contperceptual import WaveformDiscriminatorLoss 3 | from .waveform_contperceptual_panns import WaveformPANNsDiscriminatorLoss 4 | -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/__pycache__/contperceptual.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/__pycache__/contperceptual.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/__pycache__/contperceptual.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/__pycache__/contperceptual.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/__pycache__/waveform_contperceptual.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/__pycache__/waveform_contperceptual.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/__pycache__/waveform_contperceptual.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/__pycache__/waveform_contperceptual.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/__pycache__/waveform_contperceptual_panns.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/__pycache__/waveform_contperceptual_panns.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/__pycache__/waveform_contperceptual_panns.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/__pycache__/waveform_contperceptual_panns.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/panns_distance/__init__.py -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/panns_distance/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/panns_distance/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/__pycache__/distance.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/panns_distance/__pycache__/distance.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/__pycache__/distance.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/panns_distance/__pycache__/distance.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/distance.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append( 4 | "/mnt/fast/nobackup/users/hl01486/projects/general_audio_generation/stable_diffusion_for_audio" 5 | ) 6 | 7 | import torch.nn as nn 8 | import torch 9 | import os 10 | import sys 11 | 12 | from latent_diffusion.modules.losses.panns_distance.model.models import Cnn14_16k 13 | 14 | MODEL = "Cnn14_16k" 15 | CHECKPOINT_PATH = "Cnn14_16k_mAP=0.438.pth" 16 | cmd_download_ckpt = ( 17 | "wget -O " 18 | + CHECKPOINT_PATH 19 | + " https://zenodo.org/record/3987831/files/Cnn14_16k_mAP%3D0.438.pth?download=1" 20 | ) 21 | 22 | 23 | class Panns_distance(nn.Module): 24 | def __init__(self, device="cpu", metric="cos"): 25 | super(Panns_distance, self).__init__() 26 | self.panns = Cnn14_16k() 27 | if not os.path.exists(CHECKPOINT_PATH): 28 | print(cmd_download_ckpt) 29 | os.system(cmd_download_ckpt) 30 | 31 | checkpoint = torch.load(CHECKPOINT_PATH, map_location=device) 32 | self.metric = metric 33 | self.cos = nn.CosineSimilarity(dim=1, eps=1e-6) 34 | self.panns.load_state_dict(checkpoint["model"]) 35 | # Freeze PANNs parameters 36 | self.panns.eval() 37 | for p in self.panns.parameters(): 38 | p.requires_grad = False 39 | 40 | def calculate(self, fm, fm_hat): 41 | distance = [] 42 | for i, j in zip(fm, fm_hat): 43 | if self.metric == "cos": 44 | i = i.reshape(i.size(0), -1) 45 | j = j.reshape(j.size(0), -1) 46 | distance.append(self.cos(i, j)[..., None]) 47 | else: 48 | distance.append(torch.mean(torch.abs(i - j))) 49 | if self.metric == "cos": 50 | distance = torch.cat(distance, dim=-1) 51 | return torch.mean(distance) 52 | else: 53 | return torch.mean(torch.tensor(distance)) 54 | 55 | def forward(self, y, y_hat): 56 | # y: [batch, samples] 57 | # if y.size() != y_hat.size(): 58 | # print(str(y.size()) + " " + str(y_hat.size())) 59 | if y.size() != y_hat.size(): 60 | min_length = min(y.size(-1), y_hat.size(-1)) 61 | y = y[..., :min_length] 62 | y_hat = y_hat[..., :min_length] 63 | ret_dict = self.panns(y, None) 64 | ret_dict_hat = self.panns(y_hat, None) 65 | return ret_dict["feature_maps"], ret_dict_hat["feature_maps"] 66 | 67 | 68 | if __name__ == "__main__": 69 | distance = Panns_distance(metric="mean") 70 | y = torch.randn((4, 110250)) 71 | y_hat = torch.randn((4, 110080)) 72 | f1, f2 = distance(y, y_hat) 73 | print(distance.calculate(f1, f2)) 74 | -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/model/__pycache__/models.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/panns_distance/model/__pycache__/models.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/model/__pycache__/models.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/panns_distance/model/__pycache__/models.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/model/__pycache__/pytorch_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/panns_distance/model/__pycache__/pytorch_utils.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/model/__pycache__/pytorch_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/losses/panns_distance/model/__pycache__/pytorch_utils.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/model/evaluate.py: -------------------------------------------------------------------------------- 1 | from sklearn import metrics 2 | 3 | from pytorch_utils import forward 4 | 5 | 6 | class Evaluator(object): 7 | def __init__(self, model): 8 | """Evaluator. 9 | 10 | Args: 11 | model: object 12 | """ 13 | self.model = model 14 | 15 | def evaluate(self, data_loader): 16 | """Forward evaluation data and calculate statistics. 17 | 18 | Args: 19 | data_loader: object 20 | 21 | Returns: 22 | statistics: dict, 23 | {'average_precision': (classes_num,), 'auc': (classes_num,)} 24 | """ 25 | 26 | # Forward 27 | output_dict = forward( 28 | model=self.model, generator=data_loader, return_target=True 29 | ) 30 | 31 | clipwise_output = output_dict["clipwise_output"] # (audios_num, classes_num) 32 | target = output_dict["target"] # (audios_num, classes_num) 33 | 34 | average_precision = metrics.average_precision_score( 35 | target, clipwise_output, average=None 36 | ) 37 | 38 | auc = metrics.roc_auc_score(target, clipwise_output, average=None) 39 | 40 | statistics = {"average_precision": average_precision, "auc": auc} 41 | 42 | return statistics 43 | -------------------------------------------------------------------------------- /src/latent_diffusion/modules/losses/panns_distance/model/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def clip_bce(output_dict, target_dict): 6 | """Binary crossentropy loss.""" 7 | return F.binary_cross_entropy(output_dict["clipwise_output"], target_dict["target"]) 8 | 9 | 10 | def get_loss_func(loss_type): 11 | if loss_type == "clip_bce": 12 | return clip_bce 13 | -------------------------------------------------------------------------------- /src/latent_diffusion/modules/phoneme_encoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/phoneme_encoder/__init__.py -------------------------------------------------------------------------------- /src/latent_diffusion/modules/phoneme_encoder/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/phoneme_encoder/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/phoneme_encoder/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/phoneme_encoder/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/phoneme_encoder/__pycache__/attentions.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/phoneme_encoder/__pycache__/attentions.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/phoneme_encoder/__pycache__/attentions.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/phoneme_encoder/__pycache__/attentions.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/phoneme_encoder/__pycache__/commons.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/phoneme_encoder/__pycache__/commons.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/phoneme_encoder/__pycache__/commons.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/phoneme_encoder/__pycache__/commons.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/phoneme_encoder/__pycache__/encoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/phoneme_encoder/__pycache__/encoder.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/phoneme_encoder/__pycache__/encoder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_diffusion/modules/phoneme_encoder/__pycache__/encoder.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_diffusion/modules/phoneme_encoder/encoder.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import math 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | import latent_diffusion.modules.phoneme_encoder.commons as commons 8 | import latent_diffusion.modules.phoneme_encoder.attentions as attentions 9 | 10 | class TextEncoder(nn.Module): 11 | def __init__(self, 12 | n_vocab, 13 | out_channels=192, 14 | hidden_channels=192, 15 | filter_channels=768, 16 | n_heads=2, 17 | n_layers=6, 18 | kernel_size=3, 19 | p_dropout=0.1): 20 | super().__init__() 21 | self.n_vocab = n_vocab 22 | self.out_channels = out_channels 23 | self.hidden_channels = hidden_channels 24 | self.filter_channels = filter_channels 25 | self.n_heads = n_heads 26 | self.n_layers = n_layers 27 | self.kernel_size = kernel_size 28 | self.p_dropout = p_dropout 29 | 30 | self.emb = nn.Embedding(n_vocab, hidden_channels) 31 | nn.init.normal_(self.emb.weight, 0.0, hidden_channels**-0.5) 32 | 33 | self.encoder = attentions.Encoder( 34 | hidden_channels, 35 | filter_channels, 36 | n_heads, 37 | n_layers, 38 | kernel_size, 39 | p_dropout) 40 | self.proj= nn.Conv1d(hidden_channels, out_channels * 2, 1) 41 | 42 | def forward(self, x, x_lengths): 43 | x = self.emb(x) * math.sqrt(self.hidden_channels) # [b, t, h] 44 | x = torch.transpose(x, 1, -1) # [b, h, t] 45 | x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) 46 | 47 | x = self.encoder(x * x_mask, x_mask) 48 | stats = self.proj(x) * x_mask 49 | 50 | m, logs = torch.split(stats, self.out_channels, dim=1) 51 | return x, m, logs, x_mask -------------------------------------------------------------------------------- /src/latent_encoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/__init__.py -------------------------------------------------------------------------------- /src/latent_encoder/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_encoder/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_encoder/__pycache__/activations.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/__pycache__/activations.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_encoder/__pycache__/autoencoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/__pycache__/autoencoder.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_encoder/__pycache__/autoencoder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/__pycache__/autoencoder.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_encoder/__pycache__/drumencoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/__pycache__/drumencoder.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_encoder/__pycache__/wavencoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/__pycache__/wavencoder.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_encoder/alias_free_torch/__init__.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0 2 | # LICENSE is in incl_licenses directory. 3 | 4 | from .filter import * 5 | from .resample import * 6 | from .act import * -------------------------------------------------------------------------------- /src/latent_encoder/alias_free_torch/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/alias_free_torch/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_encoder/alias_free_torch/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/alias_free_torch/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_encoder/alias_free_torch/__pycache__/act.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/alias_free_torch/__pycache__/act.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_encoder/alias_free_torch/__pycache__/act.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/alias_free_torch/__pycache__/act.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_encoder/alias_free_torch/__pycache__/filter.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/alias_free_torch/__pycache__/filter.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_encoder/alias_free_torch/__pycache__/filter.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/alias_free_torch/__pycache__/filter.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_encoder/alias_free_torch/__pycache__/resample.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/alias_free_torch/__pycache__/resample.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_encoder/alias_free_torch/__pycache__/resample.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/alias_free_torch/__pycache__/resample.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_encoder/alias_free_torch/act.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0 2 | # LICENSE is in incl_licenses directory. 3 | 4 | import torch.nn as nn 5 | from .resample import UpSample1d, DownSample1d 6 | 7 | 8 | class Activation1d(nn.Module): 9 | def __init__(self, 10 | activation, 11 | up_ratio: int = 2, 12 | down_ratio: int = 2, 13 | up_kernel_size: int = 12, 14 | down_kernel_size: int = 12): 15 | super().__init__() 16 | self.up_ratio = up_ratio 17 | self.down_ratio = down_ratio 18 | self.act = activation 19 | self.upsample = UpSample1d(up_ratio, up_kernel_size) 20 | self.downsample = DownSample1d(down_ratio, down_kernel_size) 21 | 22 | # x: [B,C,T] 23 | def forward(self, x): 24 | x = self.upsample(x) 25 | x = self.act(x) 26 | x = self.downsample(x) 27 | 28 | return x -------------------------------------------------------------------------------- /src/latent_encoder/alias_free_torch/resample.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0 2 | # LICENSE is in incl_licenses directory. 3 | 4 | import torch.nn as nn 5 | from torch.nn import functional as F 6 | from .filter import LowPassFilter1d 7 | from .filter import kaiser_sinc_filter1d 8 | 9 | 10 | class UpSample1d(nn.Module): 11 | def __init__(self, ratio=2, kernel_size=None): 12 | super().__init__() 13 | self.ratio = ratio 14 | self.kernel_size = int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size 15 | self.stride = ratio 16 | self.pad = self.kernel_size // ratio - 1 17 | self.pad_left = self.pad * self.stride + (self.kernel_size - self.stride) // 2 18 | self.pad_right = self.pad * self.stride + (self.kernel_size - self.stride + 1) // 2 19 | filter = kaiser_sinc_filter1d(cutoff=0.5 / ratio, 20 | half_width=0.6 / ratio, 21 | kernel_size=self.kernel_size) 22 | self.register_buffer("filter", filter) 23 | 24 | # x: [B, C, T] 25 | def forward(self, x): 26 | _, C, _ = x.shape 27 | 28 | x = F.pad(x, (self.pad, self.pad), mode='replicate') 29 | x = self.ratio * F.conv_transpose1d( 30 | x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C) 31 | x = x[..., self.pad_left:-self.pad_right] 32 | 33 | return x 34 | 35 | 36 | class DownSample1d(nn.Module): 37 | def __init__(self, ratio=2, kernel_size=None): 38 | super().__init__() 39 | self.ratio = ratio 40 | self.kernel_size = int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size 41 | self.lowpass = LowPassFilter1d(cutoff=0.5 / ratio, 42 | half_width=0.6 / ratio, 43 | stride=ratio, 44 | kernel_size=self.kernel_size) 45 | 46 | def forward(self, x): 47 | xx = self.lowpass(x) 48 | 49 | return xx -------------------------------------------------------------------------------- /src/latent_encoder/wavedecoder/__init__.py: -------------------------------------------------------------------------------- 1 | from .decoder import * 2 | -------------------------------------------------------------------------------- /src/latent_encoder/wavedecoder/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/wavedecoder/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_encoder/wavedecoder/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/wavedecoder/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/latent_encoder/wavedecoder/__pycache__/decoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/wavedecoder/__pycache__/decoder.cpython-38.pyc -------------------------------------------------------------------------------- /src/latent_encoder/wavedecoder/__pycache__/decoder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/latent_encoder/wavedecoder/__pycache__/decoder.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/__init__.py: -------------------------------------------------------------------------------- 1 | from .tools import * 2 | from .data import * 3 | from .model import * 4 | -------------------------------------------------------------------------------- /src/utilities/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/utilities/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/__pycache__/model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/__pycache__/model.cpython-38.pyc -------------------------------------------------------------------------------- /src/utilities/__pycache__/model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/__pycache__/model.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/__pycache__/tools.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/__pycache__/tools.cpython-38.pyc -------------------------------------------------------------------------------- /src/utilities/__pycache__/tools.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/__pycache__/tools.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/audio/__init__.py: -------------------------------------------------------------------------------- 1 | from .audio_processing import * 2 | from .stft import * 3 | from .tools import * 4 | -------------------------------------------------------------------------------- /src/utilities/audio/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/audio/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/utilities/audio/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/audio/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/audio/__pycache__/audio_processing.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/audio/__pycache__/audio_processing.cpython-38.pyc -------------------------------------------------------------------------------- /src/utilities/audio/__pycache__/audio_processing.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/audio/__pycache__/audio_processing.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/audio/__pycache__/stft.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/audio/__pycache__/stft.cpython-38.pyc -------------------------------------------------------------------------------- /src/utilities/audio/__pycache__/stft.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/audio/__pycache__/stft.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/audio/__pycache__/tools.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/audio/__pycache__/tools.cpython-38.pyc -------------------------------------------------------------------------------- /src/utilities/audio/__pycache__/tools.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/audio/__pycache__/tools.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/audio/audio_processing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import librosa.util as librosa_util 4 | from scipy.signal import get_window 5 | 6 | 7 | def window_sumsquare( 8 | window, 9 | n_frames, 10 | hop_length, 11 | win_length, 12 | n_fft, 13 | dtype=np.float32, 14 | norm=None, 15 | ): 16 | """ 17 | # from librosa 0.6 18 | Compute the sum-square envelope of a window function at a given hop length. 19 | 20 | This is used to estimate modulation effects induced by windowing 21 | observations in short-time fourier transforms. 22 | 23 | Parameters 24 | ---------- 25 | window : string, tuple, number, callable, or list-like 26 | Window specification, as in `get_window` 27 | 28 | n_frames : int > 0 29 | The number of analysis frames 30 | 31 | hop_length : int > 0 32 | The number of samples to advance between frames 33 | 34 | win_length : [optional] 35 | The length of the window function. By default, this matches `n_fft`. 36 | 37 | n_fft : int > 0 38 | The length of each analysis frame. 39 | 40 | dtype : np.dtype 41 | The data type of the output 42 | 43 | Returns 44 | ------- 45 | wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))` 46 | The sum-squared envelope of the window function 47 | """ 48 | if win_length is None: 49 | win_length = n_fft 50 | 51 | n = n_fft + hop_length * (n_frames - 1) 52 | x = np.zeros(n, dtype=dtype) 53 | 54 | # Compute the squared window at the desired length 55 | win_sq = get_window(window, win_length, fftbins=True) 56 | win_sq = librosa_util.normalize(win_sq, norm=norm) ** 2 57 | win_sq = librosa_util.pad_center(win_sq, n_fft) 58 | 59 | # Fill the envelope 60 | for i in range(n_frames): 61 | sample = i * hop_length 62 | x[sample : min(n, sample + n_fft)] += win_sq[: max(0, min(n_fft, n - sample))] 63 | return x 64 | 65 | 66 | def griffin_lim(magnitudes, stft_fn, n_iters=30): 67 | """ 68 | PARAMS 69 | ------ 70 | magnitudes: spectrogram magnitudes 71 | stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods 72 | """ 73 | 74 | angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size()))) 75 | angles = angles.astype(np.float32) 76 | angles = torch.autograd.Variable(torch.from_numpy(angles)) 77 | signal = stft_fn.inverse(magnitudes, angles).squeeze(1) 78 | 79 | for i in range(n_iters): 80 | _, angles = stft_fn.transform(signal) 81 | signal = stft_fn.inverse(magnitudes, angles).squeeze(1) 82 | return signal 83 | 84 | 85 | def dynamic_range_compression(x, normalize_fun=torch.log, C=1, clip_val=1e-5): 86 | """ 87 | PARAMS 88 | ------ 89 | C: compression factor 90 | """ 91 | return normalize_fun(torch.clamp(x, min=clip_val) * C) 92 | 93 | 94 | def dynamic_range_decompression(x, C=1): 95 | """ 96 | PARAMS 97 | ------ 98 | C: compression factor used to compress 99 | """ 100 | return torch.exp(x) / C 101 | -------------------------------------------------------------------------------- /src/utilities/audio/tools.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from scipy.io.wavfile import write 4 | import torchaudio 5 | 6 | from utilities.audio.audio_processing import griffin_lim 7 | 8 | 9 | def get_mel_from_wav(audio, _stft): 10 | audio = torch.clip(torch.FloatTensor(audio).unsqueeze(0), -1, 1) 11 | audio = torch.autograd.Variable(audio, requires_grad=False) 12 | melspec, magnitudes, phases, energy = _stft.mel_spectrogram(audio) 13 | melspec = torch.squeeze(melspec, 0).numpy().astype(np.float32) 14 | magnitudes = torch.squeeze(magnitudes, 0).numpy().astype(np.float32) 15 | energy = torch.squeeze(energy, 0).numpy().astype(np.float32) 16 | return melspec, magnitudes, energy 17 | 18 | 19 | def inv_mel_spec(mel, out_filename, _stft, griffin_iters=60): 20 | mel = torch.stack([mel]) 21 | mel_decompress = _stft.spectral_de_normalize(mel) 22 | mel_decompress = mel_decompress.transpose(1, 2).data.cpu() 23 | spec_from_mel_scaling = 1000 24 | spec_from_mel = torch.mm(mel_decompress[0], _stft.mel_basis) 25 | spec_from_mel = spec_from_mel.transpose(0, 1).unsqueeze(0) 26 | spec_from_mel = spec_from_mel * spec_from_mel_scaling 27 | 28 | audio = griffin_lim( 29 | torch.autograd.Variable(spec_from_mel[:, :, :-1]), _stft._stft_fn, griffin_iters 30 | ) 31 | 32 | audio = audio.squeeze() 33 | audio = audio.cpu().numpy() 34 | audio_path = out_filename 35 | write(audio_path, _stft.sampling_rate, audio) 36 | -------------------------------------------------------------------------------- /src/utilities/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import Dataset 2 | -------------------------------------------------------------------------------- /src/utilities/data/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/data/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /src/utilities/data/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/data/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/data/__pycache__/add_on.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/data/__pycache__/add_on.cpython-38.pyc -------------------------------------------------------------------------------- /src/utilities/data/__pycache__/add_on.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/data/__pycache__/add_on.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/data/__pycache__/big_vgan_mel.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/data/__pycache__/big_vgan_mel.cpython-38.pyc -------------------------------------------------------------------------------- /src/utilities/data/__pycache__/big_vgan_mel.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/data/__pycache__/big_vgan_mel.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/data/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/data/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /src/utilities/data/__pycache__/dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/src/utilities/data/__pycache__/dataset.cpython-39.pyc -------------------------------------------------------------------------------- /src/utilities/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | import numpy as np 6 | 7 | import bigvgan 8 | 9 | 10 | def get_available_checkpoint_keys(model, ckpt): 11 | print("==> Attemp to reload from %s" % ckpt) 12 | state_dict = torch.load(ckpt)["state_dict"] 13 | current_state_dict = model.state_dict() 14 | new_state_dict = {} 15 | for k in state_dict.keys(): 16 | if ( 17 | k in current_state_dict.keys() 18 | and current_state_dict[k].size() == state_dict[k].size() 19 | ): 20 | new_state_dict[k] = state_dict[k] 21 | else: 22 | print("==> WARNING: Skipping %s" % k) 23 | print( 24 | "%s out of %s keys are matched" 25 | % (len(new_state_dict.keys()), len(state_dict.keys())) 26 | ) 27 | return new_state_dict 28 | 29 | def get_param_num(model): 30 | num_param = sum(param.numel() for param in model.parameters()) 31 | return num_param 32 | 33 | def torch_version_orig_mod_remove(state_dict): 34 | new_state_dict = {} 35 | new_state_dict["generator"] = {} 36 | for key in state_dict["generator"].keys(): 37 | if("_orig_mod." in key): 38 | new_state_dict["generator"][key.replace("_orig_mod.","")] = state_dict["generator"][key] 39 | else: 40 | new_state_dict["generator"][key] = state_dict["generator"][key] 41 | return new_state_dict 42 | 43 | def get_vocoder(config, device, mel_bins): 44 | 45 | with open("src/bigvgan/config.json", "r") as f: 46 | config = json.load(f) 47 | config = bigvgan.AttrDict(config) 48 | vocoder = bigvgan.BigVGAN(config) 49 | print("Load bigvgan_generator_16k") 50 | ckpt = torch.load("src/bigvgan/g_01000000") 51 | vocoder.load_state_dict(ckpt["generator"]) 52 | vocoder.eval() 53 | vocoder.remove_weight_norm() 54 | vocoder.to(device) 55 | 56 | return vocoder 57 | 58 | 59 | def vocoder_infer(mels, vocoder, lengths=None): 60 | with torch.no_grad(): 61 | wavs = vocoder(mels).squeeze(1) 62 | 63 | wavs = (wavs.cpu().numpy() * 32768).astype("int16") 64 | 65 | if lengths is not None: 66 | wavs = wavs[:, :lengths] 67 | 68 | # wavs = [wav for wav in wavs] 69 | 70 | # for i in range(len(mels)): 71 | # if lengths is not None: 72 | # wavs[i] = wavs[i][: lengths[i]] 73 | 74 | return wavs 75 | -------------------------------------------------------------------------------- /taming/modules/autoencoder/lpips/vgg.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Audio-AGI/FlowSep/d8164db58bd461ef5bb6df8ffd372b536ee6afb4/taming/modules/autoencoder/lpips/vgg.pth --------------------------------------------------------------------------------