├── .DS_Store ├── LICENSE ├── README.md ├── assets ├── .DS_Store ├── compare.png ├── gen.png └── teaser.png ├── combine_npz.py ├── configs └── WeToK │ ├── GenStage │ ├── imagenet_conditional_llama_B_GFQ.yaml │ ├── imagenet_conditional_llama_L_GFQ.yaml │ └── imagenet_conditional_llama_XL_GFQ.yaml │ └── Inference │ ├── GeneralDomain_compratio192_imagenet.yaml │ ├── GeneralDomain_compratio192_mscoco.yaml │ ├── GeneralDomain_compratio24_imagenet.yaml │ ├── GeneralDomain_compratio24_mscoco.yaml │ ├── GeneralDomain_compratio48_imagenet.yaml │ ├── GeneralDomain_compratio48_mscoco.yaml │ ├── GeneralDomain_compratio768_imagenet.yaml │ ├── GeneralDomain_compratio768_mscoco.yaml │ ├── ImageNet_downsample16_imagenet.yaml │ └── ImageNet_downsample8_imagenet.yaml ├── env.sh ├── evaluation_image_ddp.py ├── evaluation_original_reso_dist.py ├── main.py ├── metrics ├── fid.py ├── fvd.py ├── inception.py └── pytorch_i3d.py ├── reconstruct_image.py ├── sample.py ├── sample_evaluator.py ├── scripts ├── evaluation │ ├── evalation_sample.sh │ ├── imagenet_evaluation_256_dist.sh │ ├── imagenet_evaluation_original_dist.sh │ ├── mscoco_evaluation_original_dist.sh │ ├── mscocoval_evaluation_256_dist.sh │ └── sample_gpu_ddp.sh ├── inference │ └── reconstruct_image.sh └── train_autogressive │ └── GenStage │ ├── imagenet_conditional_llama_B_GFQ.sh │ ├── imagenet_conditional_llama_L_GFQ.sh │ └── imagenet_conditional_llama_XL_GFQ.sh └── src ├── .DS_Store └── WeTok ├── data ├── base.py ├── functional.py ├── helper_types.py ├── imagenet.py ├── mscoco.py ├── prepare_pretrain.py ├── pretrain.py ├── rand_augment.py ├── random_erasing.py ├── utils.py └── volume_transforms.py ├── lr_scheduler.py ├── models ├── cond_transformer.py ├── cond_transformer_gpt.py ├── dummy_cond_stage.py ├── lfqgan.py ├── lfqgan_pretrain.py └── lfqgan_pretrain_gan_decoder.py ├── modules ├── autoencoder │ └── lpips │ │ └── vgg.pth ├── diffusionmodules │ ├── improved_model.py │ ├── improved_video_model.py │ └── model.py ├── discriminator │ └── model.py ├── ema.py ├── losses │ ├── __init__.py │ ├── lpips.py │ ├── segmentation.py │ └── vqperceptual.py ├── scheduler │ └── lr_scheduler.py ├── transformer │ ├── gpt.py │ ├── mingpt.py │ └── permuter.py ├── util.py └── vqvae │ ├── lookup_free_quantize.py │ └── quantize.py └── util.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/.DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/README.md -------------------------------------------------------------------------------- /assets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/assets/.DS_Store -------------------------------------------------------------------------------- /assets/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/assets/compare.png -------------------------------------------------------------------------------- /assets/gen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/assets/gen.png -------------------------------------------------------------------------------- /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/assets/teaser.png -------------------------------------------------------------------------------- /combine_npz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/combine_npz.py -------------------------------------------------------------------------------- /configs/WeToK/GenStage/imagenet_conditional_llama_B_GFQ.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/GenStage/imagenet_conditional_llama_B_GFQ.yaml -------------------------------------------------------------------------------- /configs/WeToK/GenStage/imagenet_conditional_llama_L_GFQ.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/GenStage/imagenet_conditional_llama_L_GFQ.yaml -------------------------------------------------------------------------------- /configs/WeToK/GenStage/imagenet_conditional_llama_XL_GFQ.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/GenStage/imagenet_conditional_llama_XL_GFQ.yaml -------------------------------------------------------------------------------- /configs/WeToK/Inference/GeneralDomain_compratio192_imagenet.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/Inference/GeneralDomain_compratio192_imagenet.yaml -------------------------------------------------------------------------------- /configs/WeToK/Inference/GeneralDomain_compratio192_mscoco.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/Inference/GeneralDomain_compratio192_mscoco.yaml -------------------------------------------------------------------------------- /configs/WeToK/Inference/GeneralDomain_compratio24_imagenet.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/Inference/GeneralDomain_compratio24_imagenet.yaml -------------------------------------------------------------------------------- /configs/WeToK/Inference/GeneralDomain_compratio24_mscoco.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/Inference/GeneralDomain_compratio24_mscoco.yaml -------------------------------------------------------------------------------- /configs/WeToK/Inference/GeneralDomain_compratio48_imagenet.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/Inference/GeneralDomain_compratio48_imagenet.yaml -------------------------------------------------------------------------------- /configs/WeToK/Inference/GeneralDomain_compratio48_mscoco.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/Inference/GeneralDomain_compratio48_mscoco.yaml -------------------------------------------------------------------------------- /configs/WeToK/Inference/GeneralDomain_compratio768_imagenet.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/Inference/GeneralDomain_compratio768_imagenet.yaml -------------------------------------------------------------------------------- /configs/WeToK/Inference/GeneralDomain_compratio768_mscoco.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/Inference/GeneralDomain_compratio768_mscoco.yaml -------------------------------------------------------------------------------- /configs/WeToK/Inference/ImageNet_downsample16_imagenet.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/Inference/ImageNet_downsample16_imagenet.yaml -------------------------------------------------------------------------------- /configs/WeToK/Inference/ImageNet_downsample8_imagenet.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/configs/WeToK/Inference/ImageNet_downsample8_imagenet.yaml -------------------------------------------------------------------------------- /env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/env.sh -------------------------------------------------------------------------------- /evaluation_image_ddp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/evaluation_image_ddp.py -------------------------------------------------------------------------------- /evaluation_original_reso_dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/evaluation_original_reso_dist.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/main.py -------------------------------------------------------------------------------- /metrics/fid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/metrics/fid.py -------------------------------------------------------------------------------- /metrics/fvd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/metrics/fvd.py -------------------------------------------------------------------------------- /metrics/inception.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/metrics/inception.py -------------------------------------------------------------------------------- /metrics/pytorch_i3d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/metrics/pytorch_i3d.py -------------------------------------------------------------------------------- /reconstruct_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/reconstruct_image.py -------------------------------------------------------------------------------- /sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/sample.py -------------------------------------------------------------------------------- /sample_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/sample_evaluator.py -------------------------------------------------------------------------------- /scripts/evaluation/evalation_sample.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/scripts/evaluation/evalation_sample.sh -------------------------------------------------------------------------------- /scripts/evaluation/imagenet_evaluation_256_dist.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/scripts/evaluation/imagenet_evaluation_256_dist.sh -------------------------------------------------------------------------------- /scripts/evaluation/imagenet_evaluation_original_dist.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/scripts/evaluation/imagenet_evaluation_original_dist.sh -------------------------------------------------------------------------------- /scripts/evaluation/mscoco_evaluation_original_dist.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/scripts/evaluation/mscoco_evaluation_original_dist.sh -------------------------------------------------------------------------------- /scripts/evaluation/mscocoval_evaluation_256_dist.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/scripts/evaluation/mscocoval_evaluation_256_dist.sh -------------------------------------------------------------------------------- /scripts/evaluation/sample_gpu_ddp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/scripts/evaluation/sample_gpu_ddp.sh -------------------------------------------------------------------------------- /scripts/inference/reconstruct_image.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/scripts/inference/reconstruct_image.sh -------------------------------------------------------------------------------- /scripts/train_autogressive/GenStage/imagenet_conditional_llama_B_GFQ.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/scripts/train_autogressive/GenStage/imagenet_conditional_llama_B_GFQ.sh -------------------------------------------------------------------------------- /scripts/train_autogressive/GenStage/imagenet_conditional_llama_L_GFQ.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/scripts/train_autogressive/GenStage/imagenet_conditional_llama_L_GFQ.sh -------------------------------------------------------------------------------- /scripts/train_autogressive/GenStage/imagenet_conditional_llama_XL_GFQ.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/scripts/train_autogressive/GenStage/imagenet_conditional_llama_XL_GFQ.sh -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/.DS_Store -------------------------------------------------------------------------------- /src/WeTok/data/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/data/base.py -------------------------------------------------------------------------------- /src/WeTok/data/functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/data/functional.py -------------------------------------------------------------------------------- /src/WeTok/data/helper_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/data/helper_types.py -------------------------------------------------------------------------------- /src/WeTok/data/imagenet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/data/imagenet.py -------------------------------------------------------------------------------- /src/WeTok/data/mscoco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/data/mscoco.py -------------------------------------------------------------------------------- /src/WeTok/data/prepare_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/data/prepare_pretrain.py -------------------------------------------------------------------------------- /src/WeTok/data/pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/data/pretrain.py -------------------------------------------------------------------------------- /src/WeTok/data/rand_augment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/data/rand_augment.py -------------------------------------------------------------------------------- /src/WeTok/data/random_erasing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/data/random_erasing.py -------------------------------------------------------------------------------- /src/WeTok/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/data/utils.py -------------------------------------------------------------------------------- /src/WeTok/data/volume_transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/data/volume_transforms.py -------------------------------------------------------------------------------- /src/WeTok/lr_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/lr_scheduler.py -------------------------------------------------------------------------------- /src/WeTok/models/cond_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/models/cond_transformer.py -------------------------------------------------------------------------------- /src/WeTok/models/cond_transformer_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/models/cond_transformer_gpt.py -------------------------------------------------------------------------------- /src/WeTok/models/dummy_cond_stage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/models/dummy_cond_stage.py -------------------------------------------------------------------------------- /src/WeTok/models/lfqgan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/models/lfqgan.py -------------------------------------------------------------------------------- /src/WeTok/models/lfqgan_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/models/lfqgan_pretrain.py -------------------------------------------------------------------------------- /src/WeTok/models/lfqgan_pretrain_gan_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/models/lfqgan_pretrain_gan_decoder.py -------------------------------------------------------------------------------- /src/WeTok/modules/autoencoder/lpips/vgg.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/autoencoder/lpips/vgg.pth -------------------------------------------------------------------------------- /src/WeTok/modules/diffusionmodules/improved_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/diffusionmodules/improved_model.py -------------------------------------------------------------------------------- /src/WeTok/modules/diffusionmodules/improved_video_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/diffusionmodules/improved_video_model.py -------------------------------------------------------------------------------- /src/WeTok/modules/diffusionmodules/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/diffusionmodules/model.py -------------------------------------------------------------------------------- /src/WeTok/modules/discriminator/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/discriminator/model.py -------------------------------------------------------------------------------- /src/WeTok/modules/ema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/ema.py -------------------------------------------------------------------------------- /src/WeTok/modules/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/losses/__init__.py -------------------------------------------------------------------------------- /src/WeTok/modules/losses/lpips.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/losses/lpips.py -------------------------------------------------------------------------------- /src/WeTok/modules/losses/segmentation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/losses/segmentation.py -------------------------------------------------------------------------------- /src/WeTok/modules/losses/vqperceptual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/losses/vqperceptual.py -------------------------------------------------------------------------------- /src/WeTok/modules/scheduler/lr_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/scheduler/lr_scheduler.py -------------------------------------------------------------------------------- /src/WeTok/modules/transformer/gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/transformer/gpt.py -------------------------------------------------------------------------------- /src/WeTok/modules/transformer/mingpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/transformer/mingpt.py -------------------------------------------------------------------------------- /src/WeTok/modules/transformer/permuter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/transformer/permuter.py -------------------------------------------------------------------------------- /src/WeTok/modules/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/util.py -------------------------------------------------------------------------------- /src/WeTok/modules/vqvae/lookup_free_quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/vqvae/lookup_free_quantize.py -------------------------------------------------------------------------------- /src/WeTok/modules/vqvae/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/modules/vqvae/quantize.py -------------------------------------------------------------------------------- /src/WeTok/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuangshaobin/WeTok/HEAD/src/WeTok/util.py --------------------------------------------------------------------------------